def __init__(self, config):
        Base_Agent.__init__(self, config)
        self.policy_output_size = self.calculate_policy_output_size()
        self.policy_new = Neural_Network(self.state_size,
                                         self.policy_output_size,
                                         self.random_seed,
                                         self.hyperparameters,
                                         "VANILLA_NN").to(self.device)

        self.policy_old = Neural_Network(self.state_size,
                                         self.policy_output_size,
                                         self.random_seed,
                                         self.hyperparameters,
                                         "VANILLA_NN").to(self.device)
        self.max_steps_per_episode = config.environment.get_max_steps_per_episode(
        )
        self.policy_new_optimizer = optim.Adam(
            self.policy_new.parameters(),
            lr=self.hyperparameters["learning_rate"])
        self.episode_number = 0
        self.many_episode_states = []
        self.many_episode_actions = []
        self.many_episode_rewards = []
        self.experience_generator = Parallel_Experience_Generator(
            self.environment, self.policy_new, self.random_seed,
            self.hyperparameters)
Ejemplo n.º 2
0
    def run(self):
        """Starts the worker"""
        torch.set_num_threads(1)
        for ep_ix in range(self.episodes_to_run):
            with self.optimizer_lock:
                Base_Agent.copy_model_over(self.shared_model, self.local_model)
            epsilon_exploration = self.calculate_new_exploration()
            state = self.reset_game_for_worker()
            done = False
            self.episode_states = []
            self.episode_actions = []
            self.episode_rewards = []
            self.episode_log_action_probabilities = []
            self.critic_outputs = []

            while not done:
                action, action_log_prob, critic_outputs = self.pick_action_and_get_critic_values(
                    self.local_model, state, epsilon_exploration)
                next_state, reward, done, _ = self.environment.step(action)
                self.episode_states.append(state)
                self.episode_actions.append(action)
                self.episode_rewards.append(reward)
                self.episode_log_action_probabilities.append(action_log_prob)
                self.critic_outputs.append(critic_outputs)
                state = next_state

            total_loss = self.calculate_total_loss()
            self.put_gradients_in_queue(total_loss)
            self.episode_number += 1
            with self.counter.get_lock():
                self.counter.value += 1
                self.results_queue.put(np.sum(self.episode_rewards))
 def __init__(self, config):
     Base_Agent.__init__(self, config)
     self.policy = self.create_NN(input_dim=self.state_size,
                                  output_dim=self.action_size)
     self.optimizer = optim.Adam(self.policy.parameters(),
                                 lr=self.hyperparameters["learning_rate"])
     self.episode_rewards = []
     self.episode_log_probabilities = []
Ejemplo n.º 4
0
 def __init__(self, config):
     Base_Agent.__init__(self, config)
     self.policy = Neural_Network(self.state_size, self.action_size,
                                  config.seed, self.hyperparameters,
                                  "VANILLA_NN").to(self.device)
     self.optimizer = optim.Adam(self.policy.parameters(),
                                 lr=self.hyperparameters["learning_rate"])
     self.episode_rewards = []
     self.episode_log_probabilities = []
Ejemplo n.º 5
0
 def __init__(self, config):
     Base_Agent.__init__(self, config)
     self.agent_name = "REINFORCE"
     self.policy = self.create_NN(config.state_size, config.action_size,
                                  config.learning_rate)
     self.optimizer = optim.Adam(
         self.policy.parameters(),
         lr=self.config.hyperparameters["learning_rate"])
     self.episode_rewards = []
     self.episode_log_probabilities = []
Ejemplo n.º 6
0
 def __init__(self, config):
     Base_Agent.__init__(self, config)
     self.memory = Replay_Buffer(self.hyperparameters["buffer_size"],
                                 self.hyperparameters["batch_size"],
                                 config.seed)
     self.q_network_local = Neural_Network(self.state_size,
                                           self.action_size, config.seed,
                                           self.hyperparameters,
                                           "VANILLA_NN").to(self.device)
     self.q_network_optimizer = optim.Adam(
         self.q_network_local.parameters(),
         lr=self.hyperparameters["learning_rate"])
Ejemplo n.º 7
0
 def __init__(self, config):
     Base_Agent.__init__(self, config)
     self.policy_output_size = self.calculate_policy_output_size()
     self.policy_new = self.create_NN(input_dim=self.state_size, output_dim=self.policy_output_size)
     self.policy_old = self.create_NN(input_dim=self.state_size, output_dim=self.policy_output_size)
     self.policy_old.load_state_dict(copy.deepcopy(self.policy_new.state_dict()))
     self.policy_new_optimizer = optim.Adam(self.policy_new.parameters(), lr=self.hyperparameters["learning_rate"], eps=1e-4)
     self.episode_number = 0
     self.many_episode_states = []
     self.many_episode_actions = []
     self.many_episode_rewards = []
     self.experience_generator = Parallel_Experience_Generator(self.environment, self.policy_new, self.config.seed,
                                                               self.hyperparameters, self.action_size)
     self.exploration_strategy = Epsilon_Greedy_Exploration(self.config)
Ejemplo n.º 8
0
    def __init__(self, config):

        Base_Agent.__init__(self, config)

        if self.hyperparameters["policy_network_type"] == "Linear":
            self.policy = Linear_Model(self.state_size, self.action_size)
            self.best_weights_seen = self.policy.weights

        self.best_episode_score_seen = float("-inf")

        self.stochastic_action_decision = self.hyperparameters[
            "stochastic_action_decision"]
        self.noise_scale = self.hyperparameters["noise_scale_start"]
        self.noise_scale_min = self.hyperparameters["noise_scale_min"]
        self.noise_scale_max = self.hyperparameters["noise_scale_max"]
        self.noise_scale_growth_factor = self.hyperparameters[
            "noise_scale_growth_factor"]
Ejemplo n.º 9
0
    def __init__(self, config):

        Base_Agent.__init__(self, config)

        self.num_policies = self.hyperparameters["num_policies"]

        if self.hyperparameters["policy_network_type"] == "Linear":
            self.policies = [Linear_Model(self.state_size, self.action_size) for _ in range(self.num_policies)]

            self.weight_rows = self.policies[0].weights.shape[0]
            self.weight_cols = self.policies[0].weights.shape[1]

        self.stochastic_action_decision = self.hyperparameters["stochastic_action_decision"]
        self.episodes_per_policy = self.hyperparameters["episodes_per_policy"]
        self.num_policies_to_keep = self.hyperparameters["num_policies_to_keep"]

        self.policy_to_use_this_episode = 0
        self.policy_scores_this_round = [0] * self.num_policies
    def __init__(self, config):
        Base_Agent.__init__(self, config)
        assert isinstance(self.environment.reset(), int) or isinstance(
            self.environment.reset(), np.int64) or self.environment.reset(
            ).dtype == np.int64, "only works for discrete states currently"
        self.num_skills = self.hyperparameters["SKILL_AGENT"]["num_skills"]
        self.episodes_for_pretraining = self.hyperparameters["SKILL_AGENT"][
            "episodes_for_pretraining"]
        self.timesteps_before_changing_skill = self.hyperparameters["MANAGER"][
            "timesteps_before_changing_skill"]

        self.skill_agent_config = copy.deepcopy(config)
        self.skill_agent_config.hyperparameters = self.skill_agent_config.hyperparameters[
            "SKILL_AGENT"]
        self.skill_agent_config.num_episodes_to_run = self.episodes_for_pretraining

        self.manager_config = copy.deepcopy(config)
        self.manager_config.hyperparameters = self.manager_config.hyperparameters[
            "MANAGER"]
        self.manager_config.num_episodes_to_run = self.config.num_episodes_to_run - self.skill_agent_config.num_episodes_to_run