env_id = "PongNoFrameskip-v4" # established environment that will be played env = make_atari(env_id) env = wrap_deepmind(env) env = wrap_pytorch(env) num_frames = 1000000 # total frames that will be learning from batch_size = 32 # the number of samples that are provided to the model for update services at a given time gamma = 0.99 # the discount of future rewards record_idx = 10000 # replay_initial = 10000 # number frames that are held replay_buffer = ReplayBuffer(100000) model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) model.load_state_dict( torch.load("model_pretrained.pth", map_location='cpu')) #loading in the pretrained model target_model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) #load in model target_model.copy_from(model) optimizer = optim.Adam(model.parameters(), lr=0.0001) #learning rate set and optimizing the model if USE_CUDA: model = model.cuda() # sends model to gpu target_model = target_model.cuda() print("Using cuda") epsilon_start = 1.0 epsilon_final = 0.01
import matplotlib.pyplot as plt env_id = "PongNoFrameskip-v4" env = make_atari(env_id) env = wrap_deepmind(env) env = wrap_pytorch(env) num_frames = 1000000 batch_size = 32 gamma = 0.99 replay_initial = 10000 replay_buffer = ReplayBuffer(100000) model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) model.load_state_dict(torch.load('trained_model.pth')) model.eval() if USE_CUDA: model = model.cuda() epsilon_start = 1.0 epsilon_final = 0.01 epsilon_decay = 30000 losses = [] all_rewards = [] episode_reward = 0 loss_list = [] reward_list = []
from dqn import QLearner, compute_td_loss, ReplayBuffer env_id = "PongNoFrameskip-v4" env = make_atari(env_id) env = wrap_deepmind(env) env = wrap_pytorch(env) num_frames = 1000000 batch_size = 32 gamma = 0.99 record_idx = 10000 replay_initial = 10000 replay_buffer = ReplayBuffer(100000) model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) model.load_state_dict(torch.load("model_pretrained.pth", map_location='cpu')) target_model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) target_model.copy_from(model) optimizer = optim.Adam(model.parameters(), lr=0.00001) if USE_CUDA: model = model.cuda() target_model = target_model.cuda() print("Using cuda") epsilon_start = 1.0 epsilon_final = 0.01 epsilon_decay = 30000 epsilon_by_frame = lambda frame_idx: epsilon_final + ( epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay)
USE_CUDA = torch.cuda.is_available() from dqn import QLearner, compute_td_loss, ReplayBuffer env_id = "PongNoFrameskip-v4" env = make_atari(env_id) env = wrap_deepmind(env) env = wrap_pytorch(env) num_frames = 1000000 batch_size = 32 gamma = 0.99 replay_initial = 10000 replay_buffer = ReplayBuffer(100000) model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) model.load_state_dict(torch.load(sys.argv[1], map_location='cpu')) model.eval() if USE_CUDA: model = model.cuda() print("Using cuda") model.load_state_dict(torch.load(pthname, map_location='cpu')) env.seed(1) state = env.reset() done = False games_won = 0 while not done: if use_gui:
epsilon_start = 0.1 epsilon_final = 0.1 epsilon_decay = 30000 epsilon_by_frame = lambda frame_idx: epsilon_final + ( epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay) losses = [] all_rewards = [] episode_reward = 0 state = env.reset() if len(sys.argv) > 1: checkpoint = torch.load(sys.argv[1]) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) #frame_start = checkpoint['frame_idx'] #losses = checkpoint['losses'] #all_rewards = checkpoint['all_rewards'] #replay_buffer = checkpoint['replay_buffer'] frame_start = 1300000 for frame_idx in range(frame_start, frame_start + num_frames + 1): epsilon = epsilon_by_frame(frame_idx) # given our state (received from the env), model chooses an action action = model.act(state, epsilon)
env_id = "PongNoFrameskip-v4" env = make_atari(env_id) env = wrap_deepmind(env) env = wrap_pytorch(env) num_frames = 1000000 batch_size = 32 gamma = 0.99 record_idx = 10000 filename = "2model.pth" replay_initial = 10000 replay_buffer = ReplayBuffer(100000) model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) model.load_state_dict(torch.load(filename, map_location='cpu')) target_model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) target_model.copy_from(model) optimizer = optim.Adam(model.parameters(), lr=0.00001) if USE_CUDA: model = model.cuda() target_model = target_model.cuda() print("Using cuda") epsilon_start = 1.0 epsilon_final = 0.01 epsilon_decay = 30000 epsilon_by_frame = lambda frame_idx: epsilon_final + (epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay)
batch_size = 32 gamma = 0.99 target_update = 50000 epsilon_start = 1.0 epsilon_final = 0.01 epsilon_decay = 1000000 replay_initial = 10000 learning_rate = 1e-5 train_replay_buffer = ReplayBuffer(100000) analysis_replay_buffer = ReplayBuffer(100000) policy_model = QLearner(env, train_num_frames, batch_size, gamma, train_replay_buffer) target_model = QLearner(env, train_num_frames, batch_size, gamma, train_replay_buffer) target_model.load_state_dict(policy_model.state_dict()) target_model.eval() optimizer = optim.Adam(policy_model.parameters(), lr=learning_rate) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if USE_CUDA: policy_model = policy_model.to(device) target_model = target_model.to(device) epsilon_by_frame = lambda frame_idx: epsilon_final + ( epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay) def play_to_train(num_frames, policy_model, target_model, buffer): losses = [] all_rewards = []
from dqn import QLearner, compute_td_loss, ReplayBuffer env_id = "PongNoFrameskip-v4" env = make_atari(env_id) env = wrap_deepmind(env) env = wrap_pytorch(env) num_frames = 1000000 batch_size = 32 gamma = 0.99 record_idx = 10000 replay_initial = 10000 replay_buffer = ReplayBuffer(100000) model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) model.load_state_dict(torch.load("model1.pth", map_location='cpu')) target_model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) target_model.copy_from(model) optimizer = optim.Adam(model.parameters(), lr=0.00001) if USE_CUDA: model = model.cuda() target_model = target_model.cuda() print("Using cuda") epsilon_start = 1.0 epsilon_final = 0.01 epsilon_decay = 30000 epsilon_by_frame = lambda frame_idx: epsilon_final + ( epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay)
#num_frames = 20000 batch_size = 32 gamma = 0.99 replay_initial = 10000 replay_buffer = ReplayBuffer(100000) epsilon_start = 1.0 epsilon_final = 0.01 epsilon_decay = 30000 epsilon_by_frame = lambda frame_idx: epsilon_final + (epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay) """loading the saved model""" device = torch.device("cuda") model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) filename = 'newdqnModel.pt' model.load_state_dict(torch.load(filename)) model.to(device) model.eval() """choosing 1000 frames randomly!""" frame_range = 50000 frame_list = set(random.sample(range(1, frame_range), 1000)) vis_feature_matrix = [] vis_rewards = [] vis_actions = [] state = env.reset() indx = 0 episode_reward = 0 for frame_idx in range(0,frame_range): #print(frame_idx)
env_id = "PongNoFrameskip-v4" env = make_atari(env_id) env = wrap_deepmind(env) env = wrap_pytorch(env) num_frames = 1000000 batch_size = 32 gamma = 0.99 replay_initial = 10000 replay_buffer = ReplayBuffer(100000) t_replay_buffer = ReplayBuffer(100000) model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) target_model = QLearner(env, num_frames, batch_size, gamma, t_replay_buffer) target_model.load_state_dict(model.state_dict()) optimizer = optim.Adam(model.parameters(), lr=0.00001) if USE_CUDA: model = model.cuda() target_model = target_model.cuda() epsilon_start = 1.0 epsilon_final = 0.01 epsilon_decay = 30000 epsilon_by_frame = lambda frame_idx: epsilon_final + ( epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay) losses = [] all_rewards = [] episode_reward = 0