def __init__(self, state_space, action_space, seed, opts): self.state_space = state_space self.action_space = action_space self.seed = random.seed(seed) self.opt = opts self.batch_size = opts.batch self.memory_size = opts.memory_size self.transfer_rate = opts.transfer_rate self.gamma = opts.discount_rate self.lr = opts.lr self.update_freq = opts.update_freq '''DQNetwork''' self.local_model = DuelingQNetwork(state_space, action_space, seed).to(device) self.target_model = DuelingQNetwork(state_space, action_space, seed).to(device) self.optimizer = Adam(self.local_model.parameters(), lr=self.lr) '''Replay Memory''' self.memory = replayMemory(action_space, self.memory_size, self.batch_size, self.seed) '''How often to update the model''' self.update_every = 0
def __init__(self): self.env = gym.make(Params['GAME']) # setting up parameters self.batch_size = Params['BATCH_SIZE'] self.buffer_size = Params['MEMORY_BUFFER_SIZE'] self.random_action_prob = Params['RANDOM_ACTION_PROB_START'] self.random_action_prob_end = Params['RANDOM_ACTION_PROB_END'] self.frame_skip = Params['FRAME_SKIP'] self.update_freq = Params['UPDATE_FREQ'] self.sync_freq = Params['SYNC_FREQ'] self.rand_prob_step = (self.random_action_prob - self.random_action_prob_end)/Params['ANNEALING_STEP'] self.reward_discount = Params['REWARD_DISCOUNT'] self.IMG_X = Params['IMG_X'] self.IMG_Y = Params['IMG_Y'] self.action_space = self.env.action_space.n self.updates = 0 # setting up utilities self.memory_buffer = replayMemory(self.IMG_X, self.IMG_Y, self.buffer_size) self.nn = DDQNet(self.action_space) # initialize variables self.sess = tf.Session() self.saver = tf.train.Saver() self.sess.run(tf.global_variables_initializer()) # restore variables self.logger = Logger(self.sess, self.saver) self.random_action_prob = self.random_action_prob_end if self.logger.restore() else self.random_action_prob
def __init__(self, state_space, action_space, seed, batch_size): self.state_space = state_space self.action_space = action_space self.seed = random.seed(seed) self.batch_size = batch_size '''DQNetwork''' self.local_model = pytorch_DQNetwork(state_space, action_space, seed).to(device) self.target_model = pytorch_DQNetwork(state_space, action_space, seed).to(device) self.optimizer = Adam(self.local_model.parameters(), lr=LR) '''Replay Memory''' self.memory = replayMemory(action_space, MEMORY_SIZE, batch_size, seed) '''How often to update the model''' self.update_every = 0
#import util module from util import * import torch torch.manual_seed(0) # set random seed import torch.optim as optim env = gym.make(opts.env) env.seed(opts.env_seed) #import memory for ppo from memory import replayMemory memory = replayMemory(env.action_space.n, memory_size=opts.max_iteration, batch_size=opts.batch, seed=0) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") state_size = env.observation_space.shape[0] policy = Policy(s_size=state_size, h_size=opts.hidden).to(device) if opts.print_model: print("The model: ", policy) optimizer = optim.Adam(policy.parameters(), lr=opts.lr) def reinforce(n_episodes=1000, max_t=1000, gamma=1.0, print_every=100): scores_deque = deque(maxlen=100) scores = []