Exemple #1
0
    def create_model(self):
        model = nn.Sequential(nn.Linear(self.state_space.shape[0], 64),
                              nn.ReLU(), nn.Linear(64, 32), nn.ReLU(),
                              nn.Linear(32, 1))
        a3c_logger.info(model)

        return model
Exemple #2
0
    def create_model(self):
        model = nn.Sequential(
            nn.Linear(self.state_space.shape[0], 32),
            nn.Sigmoid(),
            nn.Linear(32, 16),
            nn.Sigmoid(),
            nn.Linear(16, self.model_output_dim),
            nn.Softplus()
        )
        a3c_logger.info(model)

        return model
 def render(self):
     for e in range(10):
         state = self.env.reset()
         done = False
         score = 0
         while not done:
             self.env.render()
             action = self.Actor.get_best_action(t(state))
             state, reward, done, _ = self.env.step(action)
             score += reward
             if done:
                 a3c_logger.info("episode: {}, score: {}".format(e, score))
                 break
     self.env.close()
Exemple #4
0
    def run(self):
        if self.globalA3C is None:
            raise Exception("Global model is not set! Please call set_global_model(global_model) to set the parent model.")

        state = self.env.reset()  # reset env and get initial state
        episode = 0
        while episode < self.max_episodes:
            # reset stuff
            is_terminal = False
            states, actions, rewards = [], [], []
            step_start = self.step

            while not is_terminal and self.step - step_start < self.step_max:
                states.append(state)  # register current state
                action = self.Actor.draw_action(t(state))  # draw action
                next_state, reward, is_terminal, info = self.env.step(action)  # perform action
                actions.append(action)  # register action
                rewards.append(reward)  # register reward
                state = next_state
                self.step += 1

            # replay experience backwards and compute gradients
            self.replay_steps(states, actions, rewards, state, is_terminal)
            self.lock.acquire()
            self.update_global_models()
            self.sync_models()
            self.globalA3C.episode += 1
            episode = self.globalA3C.episode
            self.lock.release()

            if episode % self.measure_step == 0 and self.eval_repeats != 0:
                self.lock.acquire()
                mean, _ = self.evaluate(self.eval_repeats)
                self.globalA3C.performance.append([episode, mean])
                self.lock.release()
                if self.log_info:
                    a3c_logger.info(f"\nEpisode: {episode}\nMean accumulated rewards: {mean}")

            if is_terminal:
                self.update_local_results()
                state = self.env.reset()  # reset env and get initial state
                self.local_episode += 1

        self.env.close()
 def test(self):
     a3c_logger.info(f"Starting test of A3C after {self.max_episodes} episodes of training.")
     mean, performance = self.evaluate(50)
     a3c_logger.info(f"Mean accumulated score: {mean}")
     return np.array(performance)