def test_readme(self):
        environment = UnittestEnvironment(states=dict(type='float',
                                                      shape=(10, )),
                                          actions=dict(type='int',
                                                       num_values=5))

        def get_current_state():
            return environment.reset()

        def execute_decision(x):
            return environment.execute(actions=x)[2]

        # Instantiate a Tensorforce agent
        agent = PPOAgent(states=dict(type='float', shape=(10, )),
                         actions=dict(type='int', num_values=5),
                         memory=10000,
                         network='auto',
                         update_mode=dict(unit='episodes', batch_size=10),
                         step_optimizer=dict(type='adam', learning_rate=1e-4))

        # Initialize the agent
        agent.initialize()

        # Retrieve the latest (observable) environment state
        state = get_current_state()  # (float array of shape [10])

        # Query the agent for its action decision
        action = agent.act(states=state)  # (scalar between 0 and 4)

        # Execute the decision and retrieve the current performance score
        reward = execute_decision(action)  # (any scalar float)

        # Pass feedback about performance (and termination) to the agent
        agent.observe(reward=reward, terminal=False)

        agent.close()
        environment.close()
        self.assertTrue(expr=True)
# - format: 'numpy' or 'hdf5' store only weights, 'checkpoint' stores full TensorFlow model
runner.agent.save(directory="C:\\Users\\ali_k\\Desktop\\my_model",
                  format='checkpoint')

# Print resulting stats and graphs
print("Stats: ", runner.episode_rewards, runner.episode_timesteps)

episode_rewards, episode_timesteps = runner.episode_rewards, runner.episode_timesteps

plt.plot(runner.episode_rewards, label='reward')
plt.hist(runner.episode_rewards, label='reward')
plt.plot(runner.episode_timesteps, label='life time')
plt.hist(runner.episode_timesteps, label='life time')

try:
    agent.close()
    runner.close()
except AttributeError as e:
    print("Hata:", e)
    pass

# Test the agent with RandomAgent opponents

test_agents = []
for agent_id in range(3):
    test_agents.append(
        SimpleAgent(config["agent"](agent_id, config["game_type"])))

# Add TensorforceAgent
agent_id += 1
test_agents.append(