def test_readme(self): environment = UnittestEnvironment(states=dict(type='float', shape=(10, )), actions=dict(type='int', num_values=5)) def get_current_state(): return environment.reset() def execute_decision(x): return environment.execute(actions=x)[2] # Instantiate a Tensorforce agent agent = PPOAgent(states=dict(type='float', shape=(10, )), actions=dict(type='int', num_values=5), memory=10000, network='auto', update_mode=dict(unit='episodes', batch_size=10), step_optimizer=dict(type='adam', learning_rate=1e-4)) # Initialize the agent agent.initialize() # Retrieve the latest (observable) environment state state = get_current_state() # (float array of shape [10]) # Query the agent for its action decision action = agent.act(states=state) # (scalar between 0 and 4) # Execute the decision and retrieve the current performance score reward = execute_decision(action) # (any scalar float) # Pass feedback about performance (and termination) to the agent agent.observe(reward=reward, terminal=False) agent.close() environment.close() self.assertTrue(expr=True)
# - format: 'numpy' or 'hdf5' store only weights, 'checkpoint' stores full TensorFlow model runner.agent.save(directory="C:\\Users\\ali_k\\Desktop\\my_model", format='checkpoint') # Print resulting stats and graphs print("Stats: ", runner.episode_rewards, runner.episode_timesteps) episode_rewards, episode_timesteps = runner.episode_rewards, runner.episode_timesteps plt.plot(runner.episode_rewards, label='reward') plt.hist(runner.episode_rewards, label='reward') plt.plot(runner.episode_timesteps, label='life time') plt.hist(runner.episode_timesteps, label='life time') try: agent.close() runner.close() except AttributeError as e: print("Hata:", e) pass # Test the agent with RandomAgent opponents test_agents = [] for agent_id in range(3): test_agents.append( SimpleAgent(config["agent"](agent_id, config["game_type"]))) # Add TensorforceAgent agent_id += 1 test_agents.append(