Esempio n. 1
0
class DQNLunar:
    def __init__(self):
        self.env = gym.make('LunarLander-v2')
        self.nn = DQN(8, 4)
        self.avgReward = np.zeros(100)

    def runEp(self, env, printT=False):
        state = env.reset()
        state = np.reshape(state, [1, 8])
        totalreward = 0
        for _ in range(500):
            if printT:
                env.render()
            action = self.nn.chooseAction(state)
            next_state, reward, done, _ = env.step(action)
            totalreward += reward
            next_state = np.reshape(next_state, [1, 8])
            self.nn.storePastResults(state, action, reward, next_state, done)
            state = next_state
            if done:
                break
            self.nn.train(6)
        return totalreward

    def run(self):
        for i in range(20000):  # Number of episodes s
            if i % 100 == 0:
                self.avgReward[i % 100] = self.runEp(self.env, True)
                avg = np.average(self.avgReward)
                print('Episode %f/10000 Reward: %f' % (i, avg))
            else:
                self.avgReward[i % 100] = self.runEp(self.env, True)
                print(i, self.avgReward[i % 100])
Esempio n. 2
0
    loss = F.smooth_l1_loss(state_action_values, expected_state_action_values.unsqueeze(1))

    optimizer.zero_grad()
    loss.backward()
    for param in policy_net.parameters():
        param.grad.data.clamp_(-1, 1)
    optimizer.step()
######################################################################

init_screen = get_screen()
_, _, screen_height, screen_width = init_screen.shape

# Get number of actions from gym action space

n_actions = 4
policy_net = DQN(screen_height, screen_width, n_actions,layers=20).to(device)
target_net = DQN(screen_height, screen_width, n_actions,layers=20).to(device)
PATH = 'C:/Users/sagau/Google Drive/smaller1.pth'
optimizer = optim.Adam(policy_net.parameters(),lr=1e-4)

load_mode = False
if load_mode:
    model_dict = torch.load(PATH,map_location=torch.device('cpu'))
    i_episode = model_dict['epoch']
    optimizer.load_state_dict(model_dict['optimizer'])
    policy_net.load_state_dict(model_dict['state_dict'])
    target_net.load_state_dict(model_dict['state_dict'])
    episode_durations = model_dict['episode_durations']
    total_reward_list = model_dict['total_reward_list']
    point_list = model_dict['point_list']
    plot_durations()
Esempio n. 3
0
        return torch.tensor([[random.randrange(n_actions)]], device=device, dtype=torch.long)


######################################################################
# Training loop

######################################################################

init_screen = get_screen()
_, _, screen_height, screen_width = init_screen.shape

# Get number of actions from gym action space

n_actions = 4
#PATH = 'C:/Users/sagau/Desktop/Kaggle/TetrisRepo/models/model1_2.pth'
policy_net = DQN(screen_height, screen_width, n_actions,layers=20)
policy_net.eval()
policy_net = DQN(screen_height, screen_width, n_actions,layers=20).to(device)

PATH = 'C:/Users/sagau/Google Drive/transfersmaller.pth'
model_dict = torch.load(PATH,map_location=torch.device('cpu'))
policy_net.load_state_dict(model_dict['state_dict'])
    
######################################################################
# Play with model !
sleep_time = 0.2
game = Tetris(nb_rows=8,nb_cols=6)
done = game.generate_block(choice=random.randint(0,3))
rows = 0
for t in count():
#    for t in range(200):
Esempio n. 4
0
import gym
import numpy as np
from dqnClass import DQN

# Setup Enviornment
env = gym.make('CartPole-v0')
nn = DQN(4, 2)


def runEp(env, printT=False):
    state = env.reset()
    state = np.reshape(state, [1, 4])
    totalreward = 0
    for t in range(500):
        if printT:
            env.render()
        action = nn.chooseAction(state)
        next_state, reward, done, _ = env.step(action)
        totalreward += reward
        next_state = np.reshape(next_state, [1, 4])
        nn.storePastResults(state, action, reward, next_state, done)
        state = next_state
        if done:
            break
        nn.train(32)
    return t


avgReward = np.zeros(200)
for i in range(10000):  # Number of episodes s
    score = runEp(env, True)
Esempio n. 5
0
import gym
import numpy as np
from dqnClass import DQN

# Setup Enviornment
env = gym.make('LunarLander-v2')
nn = DQN(8, 4)


def runEp(env, printT=False):
    state = env.reset()
    state = np.reshape(state, [1, 8])
    totalreward = 0
    for _ in range(500):
        if printT:
            env.render()
        action = nn.chooseAction(state)
        next_state, reward, done, _ = env.step(action)
        totalreward += reward
        next_state = np.reshape(next_state, [1, 8])
        nn.storePastResults(state, action, reward, next_state, done)
        state = next_state
        if done:
            break
        nn.train(6)
    return totalreward


avgReward = np.zeros(100)
for i in range(20000):  # Number of episodes s
    if i % 100 == 0:
Esempio n. 6
0
 def __init__(self):
     self.env = gym.make('LunarLander-v2')
     self.nn = DQN(8, 4)
     self.avgReward = np.zeros(100)