Python DDQNAgent Exemples, ddqn_keras.DDQNAgent Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : main_test_model.py Projet : CodeAndAction/DDQN-Car-Racing

TOTAL_GAMETIME = 10000
N_EPISODES = 10000
REPLACE_TARGET = 10

game = GameEnv.RacingEnv()
game.fps = 60

GameTime = 0
GameHistory = []
renderFlag = True

ddqn_agent = DDQNAgent(alpha=0.0005,
                       gamma=0.99,
                       n_actions=5,
                       epsilon=0.02,
                       epsilon_end=0.01,
                       epsilon_dec=0.999,
                       replace_target=REPLACE_TARGET,
                       batch_size=64,
                       input_dims=19,
                       fname='ddqn_model.h5')

ddqn_agent.load_model()
ddqn_agent.update_network_parameters()

ddqn_scores = []
eps_history = []


def run():
    #scores = deque(maxlen=100)

Exemple #2

0

Afficher le fichier

import gym
from gym import wrappers
import numpy as np
from ddqn_keras import DDQNAgent
from utils import plotLearning

if __name__ == '__main__':
    env = gym.make('LunarLander-v2')
    ddqn_agent = DDQNAgent(alpha=0.0005,
                           gamma=0.99,
                           n_actions=4,
                           epsilon=1.0,
                           batch_size=64,
                           input_dims=8)
    n_games = 500
    #ddqn_agent.load_model()
    ddqn_scores = []
    eps_history = []
    #env = wrappers.Monitor(env, "tmp/lunar-lander-ddqn-2",
    #                         video_callable=lambda episode_id: True, force=True)

    for i in range(n_games):
        done = False
        score = 0
        observation = env.reset()
        while not done:
            action = ddqn_agent.choose_action(observation)
            observation_, reward, done, info = env.step(action)
            score += reward
            ddqn_agent.remember(observation, action, reward, observation_,
                                int(done))

Exemple #3

0

Afficher le fichier

Fichier : main.py Projet : CodeAndAction/DDQN-Car-Racing

from ddqn_keras import DDQNAgent
from collections import deque
import random, math

TOTAL_GAMETIME = 1000 # Max game time for one episode
N_EPISODES = 10000
REPLACE_TARGET = 50 

game = GameEnv.RacingEnv()
game.fps = 60

GameTime = 0 
GameHistory = []
renderFlag = False

ddqn_agent = DDQNAgent(alpha=0.0005, gamma=0.99, n_actions=5, epsilon=1.00, epsilon_end=0.10, epsilon_dec=0.9995, replace_target= REPLACE_TARGET, batch_size=512, input_dims=19)

# if you want to load the existing model uncomment this line.
# careful an existing model might be overwritten
#ddqn_agent.load_model()

ddqn_scores = []
eps_history = []

def run():

    for e in range(N_EPISODES):
        
        game.reset() #reset env 

        done = False

Exemple #4

0

Afficher le fichier

Fichier : MountainCar-v0_keras.py Projet : maratbunyatov/reinforcement_learning

import gym
from gym import wrappers
import numpy as np
import random
from ddqn_keras import DDQNAgent
from utils import plotLearning
import ipdb

SHOW_EVERY = 10

if __name__ == '__main__':
    env = gym.make('MountainCar-v0')
    # ipdb.set_trace()
    ddqn_agent = DDQNAgent(alpha=0.0005,
                           gamma=0.99,
                           n_actions=env.action_space.n,
                           epsilon=.01,
                           batch_size=64,
                           input_dims=env.observation_space.shape[0])
    n_games = 500
    #ddqn_agent.load_model()
    ddqn_scores = []
    eps_history = []
    #env = wrappers.Monitor(env, "tmp/lunar-lander-ddqn-2",
    #                         video_callable=lambda episode_id: True, force=True)

    for i in range(n_games):
        done = False
        score = 0
        observation = env.reset()
        while not done:
            if not i or not i % SHOW_EVERY:

Exemple #5

0

Afficher le fichier

Fichier : Arcanoid_keras.py Projet : maratbunyatov/reinforcement_learning

    def close(self):
        pygame.quit()


# Clock to limit speed
clock = pygame.time.Clock()

# Exit the program?
exit_program = False

env = Arcanoid()
ddqn_agent = DDQNAgent(alpha=0.005,
                       gamma=0.99,
                       n_actions=env.action_space.n,
                       epsilon=1.0,
                       batch_size=64,
                       input_dims=env.observation_space.shape[0] * 2,
                       replace_target=1000)
ddqn_scores = []
eps_history = []
history = []
n_games = 500_000
start = time.time()
for i in range(n_games):
    done = False
    observation = env.reset()
    observation = observation / [GAME_WIDTH, GAME_WIDTH, GAME_HEIGHT]
    observation = np.append([-1, -1, -1], observation)
    while not done:
        action = ddqn_agent.choose_action(observation)