Ejemplo n.º 1
0
def evaluate_actor_critic(params, path):
    model = ActorCritic(params.stack_size, get_action_space())
    model.load_state_dict(torch.load(path))
    model.eval()

    env = gym.make('CarRacing-v0')
    env_wrapper = EnvironmentWrapper(env, params.stack_size)

    total_reward = 0
    num_of_episodes = 100

    for episode in range(num_of_episodes):
        state = env_wrapper.reset()
        state = torch.Tensor([state])
        done = False
        score = 0
        while not done:
            probs, _, _ = model(state)
            action = get_actions(probs)
            state, reward, done = env_wrapper.step(action[0])
            print(probs.detach().numpy(), "\n", action, reward)
            state = torch.Tensor([state])
            score += reward
            env_wrapper.render()
        print('Episode: {0} Score: {1:.2f}'.format(episode, score))
        total_reward += score
    return total_reward / num_of_episodes
Ejemplo n.º 2
0
 def __init__(self, params, model_path):
     self.params = params
     self.model_path = model_path
     self.num_of_processes = mp.cpu_count()
     self.global_model = ActorCritic(self.params.stack_size,
                                     get_action_space())
     self.global_model.share_memory()
Ejemplo n.º 3
0
    def __init__(self,
                 process_num,
                 global_model,
                 params,
                 autosave=False):  # CHANGE
        super().__init__()

        self.process_num = process_num
        self.global_model = global_model
        self.params = params
        env = gym.make('CarRacing-v0')
        self.environment = EnvironmentWrapper(env, self.params.stack_size)
        self.model = ActorCritic(self.params.stack_size, get_action_space())
        self.optimizer = Adam(self.global_model.parameters(),
                              lr=self.params.lr)
        self.storage = Storage(self.params.steps_per_update)
        self.current_observation = torch.zeros(
            1, *self.environment.get_state_shape())

        #NEW:
        self.lr = self.params.lr
        self.autosave = autosave
        self.log_loss = []
        self.log_tmp = np.array([])
        self.log_reward = np.array([])
Ejemplo n.º 4
0
    def __init__(self, process_num, global_model, params):
        super().__init__()

        self.process_num = process_num
        self.global_model = global_model
        self.params = params
        env = gym.make('CarRacing-v0')
        self.environment = EnvironmentWrapper(env, self.params.stack_size)
        self.model = ActorCritic(self.params.stack_size, get_action_space())
        self.optimizer = Adam(self.global_model.parameters(),
                              lr=self.params.lr)
        self.storage = Storage(self.params.steps_per_update)
        self.current_observation = torch.zeros(
            1, *self.environment.get_state_shape())
Ejemplo n.º 5
0
 def __init__(self, params, model_path):
     self.params = params
     self.model_path = model_path
     self.num_of_processes = multiprocessing.cpu_count()
     self.parallel_environments = ParallelEnvironments(
         self.params.stack_size, number_of_processes=self.num_of_processes)
     self.actor_critic = ActorCritic(self.params.stack_size,
                                     get_action_space())
     self.optimizer = Adam(self.actor_critic.parameters(),
                           lr=self.params.lr)
     self.storage = Storage(self.params.steps_per_update,
                            self.num_of_processes)
     self.current_observations = torch.zeros(
         self.num_of_processes,
         *self.parallel_environments.get_state_shape())
Ejemplo n.º 6
0
def actor_critic_inference(params, path):
    model = ActorCritic(params.stack_size, get_action_space())
    model.load_state_dict(torch.load(path))
    model.eval()

    env = gym.make('CarRacing-v0')
    env_wrapper = EnvironmentWrapper(env, params.stack_size)

    state = env_wrapper.reset()
    state = torch.Tensor([state])
    done = False
    total_score = 0
    while not done:
        probs, _, _ = model(state)
        action = get_actions(probs)
        print(action)
        state, reward, done = env_wrapper.step(action[0])
        state = torch.Tensor([state])
        total_score += reward
        env_wrapper.render()
    return total_score