def create_model(self): model = nn.Sequential(nn.Linear(self.state_space.shape[0], 64), nn.ReLU(), nn.Linear(64, 32), nn.ReLU(), nn.Linear(32, 1)) a3c_logger.info(model) return model
def create_model(self): model = nn.Sequential( nn.Linear(self.state_space.shape[0], 32), nn.Sigmoid(), nn.Linear(32, 16), nn.Sigmoid(), nn.Linear(16, self.model_output_dim), nn.Softplus() ) a3c_logger.info(model) return model
def render(self): for e in range(10): state = self.env.reset() done = False score = 0 while not done: self.env.render() action = self.Actor.get_best_action(t(state)) state, reward, done, _ = self.env.step(action) score += reward if done: a3c_logger.info("episode: {}, score: {}".format(e, score)) break self.env.close()
def run(self): if self.globalA3C is None: raise Exception("Global model is not set! Please call set_global_model(global_model) to set the parent model.") state = self.env.reset() # reset env and get initial state episode = 0 while episode < self.max_episodes: # reset stuff is_terminal = False states, actions, rewards = [], [], [] step_start = self.step while not is_terminal and self.step - step_start < self.step_max: states.append(state) # register current state action = self.Actor.draw_action(t(state)) # draw action next_state, reward, is_terminal, info = self.env.step(action) # perform action actions.append(action) # register action rewards.append(reward) # register reward state = next_state self.step += 1 # replay experience backwards and compute gradients self.replay_steps(states, actions, rewards, state, is_terminal) self.lock.acquire() self.update_global_models() self.sync_models() self.globalA3C.episode += 1 episode = self.globalA3C.episode self.lock.release() if episode % self.measure_step == 0 and self.eval_repeats != 0: self.lock.acquire() mean, _ = self.evaluate(self.eval_repeats) self.globalA3C.performance.append([episode, mean]) self.lock.release() if self.log_info: a3c_logger.info(f"\nEpisode: {episode}\nMean accumulated rewards: {mean}") if is_terminal: self.update_local_results() state = self.env.reset() # reset env and get initial state self.local_episode += 1 self.env.close()
def test(self): a3c_logger.info(f"Starting test of A3C after {self.max_episodes} episodes of training.") mean, performance = self.evaluate(50) a3c_logger.info(f"Mean accumulated score: {mean}") return np.array(performance)