def render(self, mode='human', close=False): """ Gym function called at the end of a process. """ print("Rendering") if not self.validation_process: np.save(self.test_folder + '/memory_' + str(self.test_starts_index) + '.npy', self.memory) title_act = '/test_actions_' + str(self.test_starts_index) plt.plot_actions(self.memory, self.long_actions, self.short_actions, self.test_folder, title_act)
def plot_actions(self): plt.plot_actions(self.folder, self.memory, self.long_actions, self.short_actions)
def main(): flags = { 'players': 2, 'num_episodes': 1, 'num_test_episodes': 1, 'agent_class': 'SimpleAgent', 'alpha': 0.1, 'gamma': 0.9, 'seed': 12345 } options, arguments = getopt.getopt(sys.argv[1:], '', [ 'players=', 'num_episodes=', 'num_test_episodes=', 'agent_class=', 'seed=', 'alpha=', 'gamma=' ]) if arguments: sys.exit('usage: rl_env_example.py [options]\n' '--players number of players in the game.\n' '--num_episodes number of game episodes to run.\n' '--num_test_episodes number of test game episodes to run.\n' '--alpha step size for Q-learning. \n' '--gamma discount rate for Q-learning. \n' '--seed random generator seed. \n' '--agent_class {}'.format(' or '.join( AGENT_CLASSES.keys()))) for flag, value in options: flag = flag[2:] # Strip leading --. flags[flag] = type(flags[flag])(value) random.seed(flags['seed']) form_str = 'players_{}-episodes_{}-test_episodes_{}-agent_{}-alpha_{}-gamma_{}-seed_{}' suffix = form_str.format(flags['players'], flags['num_episodes'], flags['num_test_episodes'], flags['agent_class'], flags['alpha'], flags['gamma'], flags['seed']) train_suffix = suffix + '-train' test_suffix = suffix + '-test' runner = Runner(flags) print('*' * 100) print('TRAINING') agents, rewards, agents_rewards, actions = runner.run( flags['num_episodes']) print('*' * 100) player_num = len(agents) for ar in agents_rewards: plot_learning_rewards(agents_rewards[ar], ar, train_suffix) plot_actions(actions[ar], ar, train_suffix) plot_hist(rewards, -1, train_suffix, flags['players']) if flags['agent_class'] == 'QAgent': greedy_agents = [ GreedyAgent(runner.agent_config, agent.Q) for agent in agents ] print('*' * 100) print('TESTING') _, rewards, agents_rewards, actions = runner.run( flags['num_test_episodes'], greedy_agents) print('*' * 100) plot_hist(rewards, -1, test_suffix, flags['players']) for ar in agents_rewards: plot_actions(actions[ar], ar, test_suffix) print('rewards:\n{}'.format(rewards)) print('agent 0 rewards:\n{}'.format(agents_rewards[0])) print('agent 1 rewards:\n{}'.format(agents_rewards[1]))