Beispiel #1
0
USE_CUDA = torch.cuda.is_available()
from dqn import QLearner, compute_td_loss, ReplayBuffer

env_id = "PongNoFrameskip-v4"  # established environment that will be played
env = make_atari(env_id)
env = wrap_deepmind(env)
env = wrap_pytorch(env)

num_frames = 1000000  # total frames that will be learning from
batch_size = 32  # the number of samples that are provided to the model for update services at a given time
gamma = 0.99  # the discount of future rewards
record_idx = 10000  #

replay_initial = 10000  # number frames that are held
replay_buffer = ReplayBuffer(100000)
model = QLearner(env, num_frames, batch_size, gamma, replay_buffer)
model.load_state_dict(
    torch.load("model_pretrained.pth",
               map_location='cpu'))  #loading in the pretrained model

target_model = QLearner(env, num_frames, batch_size, gamma,
                        replay_buffer)  #load in model
target_model.copy_from(model)

optimizer = optim.Adam(model.parameters(),
                       lr=0.0001)  #learning rate set and optimizing the model
if USE_CUDA:
    model = model.cuda()  # sends model to gpu
    target_model = target_model.cuda()
    print("Using cuda")
import os.path as op
import matplotlib.pyplot as plt

env_id = "PongNoFrameskip-v4"
env = make_atari(env_id)
env = wrap_deepmind(env)
env = wrap_pytorch(env)

num_frames = 1000000
batch_size = 32
gamma = 0.99

replay_initial = 10000
replay_buffer = ReplayBuffer(100000)

model = QLearner(env, num_frames, batch_size, gamma, replay_buffer)
model.load_state_dict(torch.load('trained_model.pth'))
model.eval()
if USE_CUDA:
    model = model.cuda()

epsilon_start = 1.0
epsilon_final = 0.01
epsilon_decay = 30000

losses = []
all_rewards = []
episode_reward = 0

loss_list = []
reward_list = []
Beispiel #3
0
USE_CUDA = torch.cuda.is_available()
from dqn import QLearner, compute_td_loss, ReplayBuffer

env_id = "PongNoFrameskip-v4"
env = make_atari(env_id)
env = wrap_deepmind(env)
env = wrap_pytorch(env)

num_frames = 1000000
batch_size = 32
gamma = 0.99
record_idx = 10000

replay_initial = 10000
replay_buffer = ReplayBuffer(100000)
model = QLearner(env, num_frames, batch_size, gamma, replay_buffer)
model.load_state_dict(torch.load("model_pretrained.pth", map_location='cpu'))

target_model = QLearner(env, num_frames, batch_size, gamma, replay_buffer)
target_model.copy_from(model)

optimizer = optim.Adam(model.parameters(), lr=0.00001)
if USE_CUDA:
    model = model.cuda()
    target_model = target_model.cuda()
    print("Using cuda")

epsilon_start = 1.0
epsilon_final = 0.01
epsilon_decay = 30000
epsilon_by_frame = lambda frame_idx: epsilon_final + (
Beispiel #4
0
USE_CUDA = torch.cuda.is_available()
from dqn import QLearner, compute_td_loss, ReplayBuffer

env_id = "PongNoFrameskip-v4"
env = make_atari(env_id)
env = wrap_deepmind(env)
env = wrap_pytorch(env)

num_frames = 1000000
batch_size = 32
gamma = 0.99

replay_initial = 10000
replay_buffer = ReplayBuffer(100000)
model = QLearner(env, num_frames, batch_size, gamma, replay_buffer)
model.load_state_dict(torch.load(sys.argv[1], map_location='cpu'))
model.eval()
if USE_CUDA:
    model = model.cuda()
    print("Using cuda")

model.load_state_dict(torch.load(pthname, map_location='cpu'))

env.seed(1)
state = env.reset()
done = False

games_won = 0

while not done:
Beispiel #5
0
USE_CUDA = torch.cuda.is_available()
from dqn import QLearner, compute_td_loss, ReplayBuffer
import sys

env_id = "PongNoFrameskip-v4"
env = make_atari(env_id)
env = wrap_deepmind(env)
env = wrap_pytorch(env)

num_frames = 1500000
batch_size = 32
gamma = 0.9

replay_initial = 10000
replay_buffer = ReplayBuffer(1000000)
model = QLearner(env, num_frames, batch_size, gamma, replay_buffer)
optimizer = optim.Adam(model.parameters(), lr=0.00001)
if USE_CUDA:
    model = model.cuda()

epsilon_start = 0.1
epsilon_final = 0.1
epsilon_decay = 30000
epsilon_by_frame = lambda frame_idx: epsilon_final + (
    epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay)

losses = []
all_rewards = []
episode_reward = 0

state = env.reset()
env = wrap_pytorch(env)

train_num_frames = 5000000
sample_num_frames = 50000
batch_size = 32
gamma = 0.99
target_update = 50000
epsilon_start = 1.0
epsilon_final = 0.01
epsilon_decay = 1000000
replay_initial = 10000
learning_rate = 1e-5
train_replay_buffer = ReplayBuffer(100000)
analysis_replay_buffer = ReplayBuffer(100000)

policy_model = QLearner(env, train_num_frames, batch_size, gamma,
                        train_replay_buffer)
target_model = QLearner(env, train_num_frames, batch_size, gamma,
                        train_replay_buffer)
target_model.load_state_dict(policy_model.state_dict())
target_model.eval()

optimizer = optim.Adam(policy_model.parameters(), lr=learning_rate)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if USE_CUDA:
    policy_model = policy_model.to(device)
    target_model = target_model.to(device)

epsilon_by_frame = lambda frame_idx: epsilon_final + (
    epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay)

Beispiel #7
0
USE_CUDA = torch.cuda.is_available()
from dqn import QLearner, compute_td_loss, ReplayBuffer

env_id = "PongNoFrameskip-v4"
env = make_atari(env_id)
env = wrap_deepmind(env)
env = wrap_pytorch(env)

num_frames = 1000000
batch_size = 32
gamma = 0.99
record_idx = 10000

replay_initial = 10000
replay_buffer = ReplayBuffer(100000)
model = QLearner(env, num_frames, batch_size, gamma, replay_buffer)
model.load_state_dict(torch.load("model_pretrained.pth", map_location='cpu'))

target_model = QLearner(env, num_frames, batch_size, gamma, replay_buffer)
target_model.copy_from(model)

optimizer = optim.Adam(model.parameters(), lr=0.00001)
if USE_CUDA:
    model = model.cuda()
    target_model = target_model.cuda()
    print("Using cuda")

epsilon_start = 1.0
epsilon_final = 0.01
epsilon_decay = 30000
epsilon_by_frame = lambda frame_idx: epsilon_final + (
Beispiel #8
0
    plt.figure(2)
    plt.plot([reward[0] for reward in all_rewards], [reward[1] for reward in all_rewards])
    plt.xlabel('Frame #')
    plt.ylabel('Episode Reward')
    plt.savefig(f'rewards_lr={lr}.pdf')

USE_CUDA = torch.cuda.is_available()

# Set up game
env_id = "PongNoFrameskip-v4"
env = make_atari(env_id)
env = wrap_deepmind(env)
env = wrap_pytorch(env)

replay_buffer = ReplayBuffer(replay_buff_size)                                  # Buffer size
model = QLearner(env, num_frames, batch_size, gamma, replay_buffer)             # Create model
model.load_state_dict(torch.load("model_pretrained.pth", map_location='cpu'))
model.eval()

target_model = QLearner(env, num_frames, batch_size, gamma, replay_buffer)      # Create target model
target_model.copy_from(model)

# Optimize model's parameters
optimizer = optim.Adam(model.parameters(), lr=lr)
if USE_CUDA:
    model = model.cuda()
    target_model = target_model.cuda()
    print("Using cuda")

# Neg exp func. Start exploring then exploiting according to frame_indx
epsilon_by_frame = lambda frame_idx: epsilon_final + (epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay)
Beispiel #9
0
import torch.nn.functional as F
USE_CUDA = torch.cuda.is_available()
from dqn import QLearner, compute_td_loss, ReplayBuffer

env_id = "PongNoFrameskip-v4"
env    = make_atari(env_id)
env    = wrap_deepmind(env)
env    = wrap_pytorch(env)

num_frames = 1400000
batch_size = 32
gamma = 0.99
    
replay_initial = 10000
replay_buffer = ReplayBuffer(100000)
model = QLearner(env, num_frames, batch_size, gamma, replay_buffer)
optimizer = optim.Adam(model.parameters(), lr=0.00001)
if USE_CUDA:
    model = model.cuda()

epsilon_start = 1.0
epsilon_final = 0.01
epsilon_decay = 30000
epsilon_by_frame = lambda frame_idx: epsilon_final + (epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay)

losses = []
all_rewards = []
episode_reward = 0

state = env.reset()
Beispiel #10
0
USE_CUDA = torch.cuda.is_available()
from dqn import QLearner, compute_td_loss, ReplayBuffer

env_id = "PongNoFrameskip-v4"
env = make_atari(env_id)
env = wrap_deepmind(env)
env = wrap_pytorch(env)

num_frames = 1000000  # total frames will be lerning from
batch_size = 32  # num samples provided to for update ppurposes
gamma = 0.99
record_idx = 10000

replay_initial = 10000  # num frames it will hold??
replay_buffer = ReplayBuffer(100000)
model = QLearner(env, num_frames, batch_size, gamma,
                 replay_buffer)  #from dpn.py; pth file??v
model.load_state_dict(torch.load("model_pretrained.pth", map_location='cpu'))

target_model = QLearner(env, num_frames, batch_size, gamma, replay_buffer)
target_model.copy_from(model)

optimizer = optim.Adam(model.parameters(), lr=0.00001)  #lr == learning rate
if USE_CUDA:  # gpu to use
    model = model.cuda()
    target_model = target_model.cuda()
    print("Using cuda")

epsilon_start = 1.0  # in textbook, figure out what it is used fro
epsilon_final = 0.01  # go towards this from above
epsilon_decay = 30000
epsilon_by_frame = lambda frame_idx: epsilon_final + (
import sys
Variable = lambda *args, **kwargs: autograd.Variable(*args, **kwargs).cuda(
) if USE_CUDA else autograd.Variable(*args, **kwargs)
import hashlib

# initialize the game environment
env_id = "PongNoFrameskip-v4"
env = make_atari(env_id)
env = wrap_deepmind(env)
env = wrap_pytorch(env)

# initialize learner with the same parameters that it was trained with
batch_size = 32
gamma = 0.99
replay_buffer = ReplayBuffer(100000)
model = QLearner(env, 5000000, batch_size, gamma, replay_buffer)
if USE_CUDA:
    model = model.cuda()

losses = []
all_rewards = []
episode_reward = 0

# load the saved model
if len(sys.argv) > 1:
    checkpoint = torch.load(sys.argv[1])
    model.load_state_dict(checkpoint['state_dict'])
    model.eval()

# no random actions during this eval stage
epsilon = -1
Beispiel #12
0
env = wrap_pytorch(env)
num_frames = 100000
#num_frames = 20000
batch_size = 32
gamma = 0.99
replay_initial = 10000
replay_buffer = ReplayBuffer(100000)
epsilon_start = 1.0
epsilon_final = 0.01
epsilon_decay = 30000
epsilon_by_frame = lambda frame_idx: epsilon_final + (epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay)


"""loading the saved model"""
device = torch.device("cuda")
model = QLearner(env, num_frames, batch_size, gamma, replay_buffer)
filename = 'newdqnModel.pt'
model.load_state_dict(torch.load(filename))
model.to(device)
model.eval()

"""choosing 1000 frames randomly!"""
frame_range = 50000
frame_list = set(random.sample(range(1, frame_range), 1000))
vis_feature_matrix = []
vis_rewards = []
vis_actions = []

state = env.reset()
indx = 0
episode_reward = 0
Beispiel #13
0
USE_CUDA = torch.cuda.is_available()
from dqn import QLearner, compute_td_loss, ReplayBuffer
import pickle as pkl

env_id = "PongNoFrameskip-v4"
env = make_atari(env_id)
env = wrap_deepmind(env)
env = wrap_pytorch(env)

num_frames = 2000000
batch_size = 32
gamma = 0.99

replay_initial = 20000
replay_buffer = ReplayBuffer(200000)
model = QLearner(env, num_frames, batch_size, gamma, replay_buffer)
optimizer = optim.Adam(model.parameters(), lr=0.00001)
if USE_CUDA:
    model = model.cuda()

epsilon_start = 1.0
epsilon_final = 0.01
epsilon_decay = 30000
epsilon_by_frame = lambda frame_idx: epsilon_final + (
    epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay)

losses = []
all_rewards = []
episode_reward = 0

state = env.reset()
Beispiel #14
0
USE_CUDA = torch.cuda.is_available()
from dqn import QLearner, compute_td_loss, ReplayBuffer

env_id = "PongNoFrameskip-v4"
env = make_atari(env_id)
env = wrap_deepmind(env)
env = wrap_pytorch(env)

num_frames = 1000000
batch_size = 32  #num samples provided to model at a time to update
gamma = 0.99
record_idx = 10000

replay_initial = 10000
replay_buffer = ReplayBuffer(100000)
model = QLearner(env, num_frames, batch_size, gamma, replay_buffer)
model.load_state_dict(torch.load("modelsave.pth", map_location='cpu'))

target_model = QLearner(env, num_frames, batch_size, gamma, replay_buffer)
target_model.copy_from(model)  #prevents too many radical changes

optimizer = optim.Adam(model.parameters(), lr=0.0001)
if USE_CUDA:  #GPU stuff
    model = model.cuda()
    target_model = target_model.cuda()
    print("Using cuda")

epsilon_start = 0.5  # Q-learning "error"
epsilon_final = 0.01
epsilon_decay = 30000
epsilon_by_frame = lambda frame_idx: epsilon_final + (
Beispiel #15
0
USE_CUDA = torch.cuda.is_available()
from dqn import QLearner, compute_td_loss, ReplayBuffer

env_id = "PongNoFrameskip-v4"
env = make_atari(env_id)
env = wrap_deepmind(env)
env = wrap_pytorch(env)

num_frames = 1000000
batch_size = 32
gamma = 0.99

replay_initial = 10000
replay_buffer = ReplayBuffer(100000)
t_replay_buffer = ReplayBuffer(100000)
model = QLearner(env, num_frames, batch_size, gamma, replay_buffer)
target_model = QLearner(env, num_frames, batch_size, gamma, t_replay_buffer)
target_model.load_state_dict(model.state_dict())

optimizer = optim.Adam(model.parameters(), lr=0.00001)
if USE_CUDA:
    model = model.cuda()
    target_model = target_model.cuda()

epsilon_start = 1.0
epsilon_final = 0.01
epsilon_decay = 30000
epsilon_by_frame = lambda frame_idx: epsilon_final + (
    epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay)

losses = []
Beispiel #16
0
# DISCOUNTED REWARD CONSTANT
gamma = 0.99

# ...
record_idx = 10000

# ...
replay_initial = 10000

# STORES MOMENTS FROM TRAINING EXPERIENCES, RANDOMLY SAMPLED?
# STORES INPUTS AND TRANSITIONS / REWARDS FOR ANY GIVEN FRAME
replay_buffer = ReplayBuffer(100000)

# CONTAINS THE NETWORK THAT APPROXIMATES THE Q-TABLE
model = QLearner(env, num_frames, batch_size, gamma, replay_buffer)

# LOADS THE SAVED NN
model.load_state_dict(torch.load(file_name, map_location='cpu'))

# CONTAINS THE TARGET NETWORK THAT APPROXIMATES THE Q-TABLE
target_model = QLearner(env, num_frames, batch_size, gamma, replay_buffer)

# DUPLICATE THE POLICY NETWORK INTO THE TARGET (APPROXIMATES Q*)
target_model.copy_from(model)

# INITIALIZE OPTIMIZER, USING ADAM METHOD TO UPDATE GRADIENTS
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# CUDA (GFX CARD) SETUP
if USE_CUDA: