コード例 #1
0
from ddpg import Agent
import gym
import numpy as np

env = gym.make('LunarLanderContinuous-v2')

agent = Agent(alpha = 0.000025, beta = 0.00025, input_dims = [8], tau = 0.001, env = env, batch_size = 64, layer1_size = 400, layer2_size = 300, n_actions = 2)

np.random.seed(42)
score_history = []

for i in range(1000):
	done = False
	score = 0 
	obs = env.reset()
	while not done:
		act = agent.choose_action(obs)
		new_state, reward, done, info = env.step(act)
		agent.remember(obs, act, reward, new_state, int(done))
		agent.learn()
		score += reward
		obs = new_state

	score_history.append(score)
	print("Episode - {} Score - {} 100 game average {}".format(i, score, np.mean(score_history[-100:])))

	if i % 25 == 0:
		agent.save_models()

filename = l
コード例 #2
0
                normalized.append(reward)
            if reward > best_parameters[1]:
                best_parameters[0] = action
                best_parameters[1] = reward
                best_parameters[2] = step*(episode + 1)
                all_steps[episode] = (step*episode+step)
                exit = True
            rewards.append(reward)
            if reward > -10000:
                inlook.append(reward)
                avg_rewards.append(np.mean(inlook[-10:]))
            if reward > 0:
                done = True
                env.render()
    
            agent.learn(batch_size)
    
            state = new_state

inlook2 = []
avg_rewards2 = []
for episode in range(1):
    noise.reset()
    eps_reward = 0
    print(episode)
    best_parameters = [(0,0,0), 0, 0]
    step = 0
    for i in range(total_steps):
        print(step)
        step += 1
        setpoint = 20 if random == False else np.random.random()*100