def __init__(self, config): Base_Agent.__init__(self, config) self.policy_output_size = self.calculate_policy_output_size() self.policy_new = Neural_Network(self.state_size, self.policy_output_size, self.random_seed, self.hyperparameters, "VANILLA_NN").to(self.device) self.policy_old = Neural_Network(self.state_size, self.policy_output_size, self.random_seed, self.hyperparameters, "VANILLA_NN").to(self.device) self.max_steps_per_episode = config.environment.get_max_steps_per_episode( ) self.policy_new_optimizer = optim.Adam( self.policy_new.parameters(), lr=self.hyperparameters["learning_rate"]) self.episode_number = 0 self.many_episode_states = [] self.many_episode_actions = [] self.many_episode_rewards = [] self.experience_generator = Parallel_Experience_Generator( self.environment, self.policy_new, self.random_seed, self.hyperparameters)
def run(self): """Starts the worker""" torch.set_num_threads(1) for ep_ix in range(self.episodes_to_run): with self.optimizer_lock: Base_Agent.copy_model_over(self.shared_model, self.local_model) epsilon_exploration = self.calculate_new_exploration() state = self.reset_game_for_worker() done = False self.episode_states = [] self.episode_actions = [] self.episode_rewards = [] self.episode_log_action_probabilities = [] self.critic_outputs = [] while not done: action, action_log_prob, critic_outputs = self.pick_action_and_get_critic_values( self.local_model, state, epsilon_exploration) next_state, reward, done, _ = self.environment.step(action) self.episode_states.append(state) self.episode_actions.append(action) self.episode_rewards.append(reward) self.episode_log_action_probabilities.append(action_log_prob) self.critic_outputs.append(critic_outputs) state = next_state total_loss = self.calculate_total_loss() self.put_gradients_in_queue(total_loss) self.episode_number += 1 with self.counter.get_lock(): self.counter.value += 1 self.results_queue.put(np.sum(self.episode_rewards))
def __init__(self, config): Base_Agent.__init__(self, config) self.policy = self.create_NN(input_dim=self.state_size, output_dim=self.action_size) self.optimizer = optim.Adam(self.policy.parameters(), lr=self.hyperparameters["learning_rate"]) self.episode_rewards = [] self.episode_log_probabilities = []
def __init__(self, config): Base_Agent.__init__(self, config) self.policy = Neural_Network(self.state_size, self.action_size, config.seed, self.hyperparameters, "VANILLA_NN").to(self.device) self.optimizer = optim.Adam(self.policy.parameters(), lr=self.hyperparameters["learning_rate"]) self.episode_rewards = [] self.episode_log_probabilities = []
def __init__(self, config): Base_Agent.__init__(self, config) self.agent_name = "REINFORCE" self.policy = self.create_NN(config.state_size, config.action_size, config.learning_rate) self.optimizer = optim.Adam( self.policy.parameters(), lr=self.config.hyperparameters["learning_rate"]) self.episode_rewards = [] self.episode_log_probabilities = []
def __init__(self, config): Base_Agent.__init__(self, config) self.memory = Replay_Buffer(self.hyperparameters["buffer_size"], self.hyperparameters["batch_size"], config.seed) self.q_network_local = Neural_Network(self.state_size, self.action_size, config.seed, self.hyperparameters, "VANILLA_NN").to(self.device) self.q_network_optimizer = optim.Adam( self.q_network_local.parameters(), lr=self.hyperparameters["learning_rate"])
def __init__(self, config): Base_Agent.__init__(self, config) self.policy_output_size = self.calculate_policy_output_size() self.policy_new = self.create_NN(input_dim=self.state_size, output_dim=self.policy_output_size) self.policy_old = self.create_NN(input_dim=self.state_size, output_dim=self.policy_output_size) self.policy_old.load_state_dict(copy.deepcopy(self.policy_new.state_dict())) self.policy_new_optimizer = optim.Adam(self.policy_new.parameters(), lr=self.hyperparameters["learning_rate"], eps=1e-4) self.episode_number = 0 self.many_episode_states = [] self.many_episode_actions = [] self.many_episode_rewards = [] self.experience_generator = Parallel_Experience_Generator(self.environment, self.policy_new, self.config.seed, self.hyperparameters, self.action_size) self.exploration_strategy = Epsilon_Greedy_Exploration(self.config)
def __init__(self, config): Base_Agent.__init__(self, config) if self.hyperparameters["policy_network_type"] == "Linear": self.policy = Linear_Model(self.state_size, self.action_size) self.best_weights_seen = self.policy.weights self.best_episode_score_seen = float("-inf") self.stochastic_action_decision = self.hyperparameters[ "stochastic_action_decision"] self.noise_scale = self.hyperparameters["noise_scale_start"] self.noise_scale_min = self.hyperparameters["noise_scale_min"] self.noise_scale_max = self.hyperparameters["noise_scale_max"] self.noise_scale_growth_factor = self.hyperparameters[ "noise_scale_growth_factor"]
def __init__(self, config): Base_Agent.__init__(self, config) self.num_policies = self.hyperparameters["num_policies"] if self.hyperparameters["policy_network_type"] == "Linear": self.policies = [Linear_Model(self.state_size, self.action_size) for _ in range(self.num_policies)] self.weight_rows = self.policies[0].weights.shape[0] self.weight_cols = self.policies[0].weights.shape[1] self.stochastic_action_decision = self.hyperparameters["stochastic_action_decision"] self.episodes_per_policy = self.hyperparameters["episodes_per_policy"] self.num_policies_to_keep = self.hyperparameters["num_policies_to_keep"] self.policy_to_use_this_episode = 0 self.policy_scores_this_round = [0] * self.num_policies
def __init__(self, config): Base_Agent.__init__(self, config) assert isinstance(self.environment.reset(), int) or isinstance( self.environment.reset(), np.int64) or self.environment.reset( ).dtype == np.int64, "only works for discrete states currently" self.num_skills = self.hyperparameters["SKILL_AGENT"]["num_skills"] self.episodes_for_pretraining = self.hyperparameters["SKILL_AGENT"][ "episodes_for_pretraining"] self.timesteps_before_changing_skill = self.hyperparameters["MANAGER"][ "timesteps_before_changing_skill"] self.skill_agent_config = copy.deepcopy(config) self.skill_agent_config.hyperparameters = self.skill_agent_config.hyperparameters[ "SKILL_AGENT"] self.skill_agent_config.num_episodes_to_run = self.episodes_for_pretraining self.manager_config = copy.deepcopy(config) self.manager_config.hyperparameters = self.manager_config.hyperparameters[ "MANAGER"] self.manager_config.num_episodes_to_run = self.config.num_episodes_to_run - self.skill_agent_config.num_episodes_to_run