def __init__(self, game, name, s_size, a_size, number_of_agents, trainer, model_path, global_episodes, amount_of_agents_to_send_message_to, display=False, comm=False, comm_size_per_agent=0, spread_messages=True, comm_delivery_failure_chance=0, comm_gaussian_noise=0, comm_jumble_chance=0): self.name = "worker_" + str(name) self.is_chief = self.name == 'worker_0' print(self.name) self.number = name self.number_of_agents = number_of_agents self.model_path = model_path self.trainer = trainer self.global_episodes = global_episodes self.amount_of_agents_to_send_message_to = amount_of_agents_to_send_message_to self.episode_rewards = [] self.episode_lengths = [] self.episode_mean_values = [] with tf.variable_scope(self.name): self.increment = self.global_episodes.assign_add(1) self.summary_writer = tf.summary.FileWriter("train_" + str(self.number)) # Create the local copy of the network and the tensorflow op to copy global parameters to local network self.local_AC = AC_Network( s_size, a_size, amount_of_agents_to_send_message_to * comm_size_per_agent, amount_of_agents_to_send_message_to * comm_size_per_agent if spread_messages else comm_size_per_agent, self.name, trainer) self.update_local_ops = update_target_graph('global', self.name) # Env Pursuit set-up self.env = game self.s_size = s_size self.comm = comm self.display = display self.message_size = comm_size_per_agent self.spread_messages = spread_messages self.spread_rewards = False self.comm_delivery_failure_chance = comm_delivery_failure_chance self.comm_gaussian_noise = comm_gaussian_noise self.comm_jumble_chance = comm_jumble_chance
def __init__(self, game, name, s_size, a_size, trainer, model_path, global_episodes): self.name = "worker_" + str(name) self.number = name self.model_path = model_path self.trainer = trainer self.global_episodes = global_episodes self.increment = self.global_episodes.assign_add(1) self.episode_rewards = [] self.episode_lengths = [] self.episode_mean_values = [] self.summary_writer = tf.summary.FileWriter("train_" + str(self.number)) # TODO Each worker: Select random track, select random car. # TODO Screen: Resolution, colour? Capture screens. # TODO Features: Render game features? Mini-map, speedometer? Camera-angle? # TODO Looping: How and when to timeout? How to restart (when off track, when going backwards etc.)? # TODO Rewards: How given? # TODO Interact: Do action (steering/speed). # TODO Interface: # Constructor. # env.new_episode() # env.get_state().screen_buffer # env.is_episode_finished() # env.make_action(self.actions[a]) / 100.0 # Create the local copy of the network and the tensorflow op to copy global paramters to local network self.local_AC = A3CNetwork(s_size, a_size, self.name, trainer) self.update_local_ops = update_target_graph('global', self.name) # # The Below code is related to setting up the Doom environment # game.set_doom_scenario_path("basic.wad") # This corresponds to the simple task we will pose our agent # game.set_doom_map("map01") # game.set_screen_resolution(ScreenResolution.RES_160X120) # #game.set_screen_format(ScreenFormat.GRAY8) # game.set_render_hud(False) # game.set_render_crosshair(False) # game.set_render_weapon(True) # game.set_render_decals(False) # game.set_render_particles(False) # game.add_available_button(Button.MOVE_LEFT) # game.add_available_button(Button.MOVE_RIGHT) # game.add_available_button(Button.ATTACK) # game.add_available_game_variable(GameVariable.AMMO2) # game.add_available_game_variable(GameVariable.POSITION_X) # game.add_available_game_variable(GameVariable.POSITION_Y) # game.set_episode_timeout(300) # game.set_episode_start_time(10) # game.set_window_visible(False) # game.set_sound_enabled(False) # game.set_living_reward(-1) # game.set_mode(Mode.PLAYER) # game.init() self.actions = self.actions = np.identity(a_size, dtype=bool).tolist() # End Doom set-up self.env = game
def __init__(self, game, name, s_size, s_size_central, a_size, number_of_agents, trainer, model_path, global_episodes, display=False, comm=False, comm_size_per_agent=0, spread_messages=True, critic_action=False, critic_comm=False, comm_delivery_failure_chance=0, comm_gaussian_noise=0, comm_jumble_chance=0): self.name = "worker_" + str(name) self.is_chief = self.name == 'worker_0' print(self.name) self.number = name self.number_of_agents = number_of_agents self.model_path = model_path self.trainer = trainer self.global_episodes = global_episodes self.episode_rewards = [] self.episode_lengths = [] self.episode_mean_values = [] with tf.variable_scope(self.name): self.increment = self.global_episodes.assign_add(1) self.summary_writer = tf.summary.FileWriter("train_" + str(self.number)) self.critic_action = critic_action self.critic_comm = critic_comm # Create the local copy of the network and the tensorflow op to copy global parameters to local network self.local_AC = \ [AC_Network(s_size[i], s_size_central[i], number_of_agents, a_size[i], (number_of_agents - 1) * comm_size_per_agent, (number_of_agents - 1) * comm_size_per_agent if spread_messages else comm_size_per_agent, self.name, trainer, "_agent" + str(i), critic_action=critic_action, critic_comm=critic_comm) for i in range(self.number_of_agents)] self.update_local_ops = [update_target_graph('global_agent' + str(i), self.name + "_agent" + str(i)) for i in range(self.number_of_agents)] # Env Pursuit set-up self.env = game self.s_size = s_size self.number_of_actions = a_size self.action_indexes = [list(range(noa)) for noa in self.number_of_actions] self.actions_one_hot = [np.zeros([noa, noa]) for noa in self.number_of_actions] for agent in range(self.number_of_agents): for i in range(self.number_of_actions[agent]): self.actions_one_hot[agent][i][i] = 1 self.comm = comm self.display = display self.message_size = comm_size_per_agent self.spread_messages = spread_messages self.spread_rewards = True self.comm_delivery_failure_chance = comm_delivery_failure_chance self.comm_gaussian_noise = comm_gaussian_noise self.comm_jumble_chance = comm_jumble_chance
def __init__(self, game, name, s_size, a_size, trainer, model_path, global_episodes): self.name = "worker_" + str(name) self.number = name self.model_path = model_path self.trainer = trainer self.global_episodes = global_episodes self.increment = self.global_episodes.assign_add(1) self.episode_rewards = [] self.episode_lengths = [] self.episode_mean_values = [] self.summary_writer = tf.summary.FileWriter("train_" + str(self.number)) # Create the local copy of the network and the tensorflow op to copy global paramters to local network self.local_AC = A3CNetwork(s_size, a_size, self.name, trainer) self.update_local_ops = update_target_graph('global', self.name) # # The Below code is related to setting up the Doom environment game.set_doom_scenario_path( "basic.wad" ) # This corresponds to the simple task we will pose our agent game.set_doom_map("map01") game.set_screen_resolution(ScreenResolution.RES_160X120) game.set_screen_format(ScreenFormat.GRAY8) game.set_render_hud(False) game.set_render_crosshair(False) game.set_render_weapon(True) game.set_render_decals(False) game.set_render_particles(False) game.add_available_button(Button.MOVE_LEFT) game.add_available_button(Button.MOVE_RIGHT) game.add_available_button(Button.ATTACK) game.add_available_game_variable(GameVariable.AMMO2) game.add_available_game_variable(GameVariable.POSITION_X) game.add_available_game_variable(GameVariable.POSITION_Y) game.set_episode_timeout(300) game.set_episode_start_time(10) game.set_window_visible(False) game.set_sound_enabled(False) game.set_living_reward(-1) game.set_mode(Mode.PLAYER) game.init() self.actions = self.actions = np.identity(a_size, dtype=bool).tolist() # End Doom set-up self.env = game