def train_ale_environment_lstm(): # Create an ALE for game Breakout environment = ALEEnvironment(ALEEnvironment.PONG, loss_of_life_termination=True) ################################################################################### # In case using Gym # environment = GymEnvironment("Breakout-v0", state_processor=AtariProcessor()) ################################################################################### # Create a network configuration for Atari A3C network_config = AtariA3CLSTMConfig(environment, initial_learning_rate=0.001) # Create a shared network for A3C agent network = PolicyNetwork(network_config, num_of_checkpoints=40, using_gpu=True) # Create A3C agent agent = A3CLSTMAgent( network, environment, save_time_based=30, checkpoint_stop=40, log_dir='./train/Pong/a3c_lstm_gpu_8_threads_pong_time_based_30_20', num_of_threads=8) # Train it agent.train()
def train_ale_environment(): # Create an ALE for Breakout environment = ALEEnvironment(ALEEnvironment.BREAKOUT) ################################################################################### # In the case of using Gym # environment = GymEnvironment("Breakout-v0", state_processor=AtariProcessor()) ################################################################################### # Create a network configuration for Atari A3C network_config = AtariA3CConfig(environment, initial_learning_rate=0.004) # Create a shared network for A3C agent network = PolicyNetwork(network_config, num_of_checkpoints=40, using_gpu=True) # Create A3C agent agent = A3CAgent( network, environment, save_time_based=30, checkpoint_stop=20, log_dir='./train/Breakout/a3c_gpu_8_threads_breakout_time_based_30_20', num_of_threads=8) # Train it agent.train()
def eval_ale_environment(model_path, render, num_of_epochs, steps_per_epoch, initial_epsilon, log_dir): # Create an ALE for game Breakout environment = ALEEnvironment(ALEEnvironment.RIVERRAID, is_render=render, max_episode_steps=5000) # Create a network configuration for Atari DQN network_config = PrioritizedAtariDQNConfig(environment) # Create a policy network for DQN agent network = PolicyNetwork(network_config, load_model_path=model_path) # Create DQN agent agent = DQNAgent(network, environment, initial_epsilon=initial_epsilon, report_frequency=1, num_of_threads=8, num_of_epochs=num_of_epochs, steps_per_epoch=steps_per_epoch, log_dir=log_dir, prioritized=True) # Evaluate it return agent.evaluate()
def composite_agents(): # Create an ALE for game Breakout environment = ALEEnvironment(ALEEnvironment.BREAKOUT, is_render=True, max_episode_steps=10000) network_config = AtariDQA3CConfig(environment) network = DQPolicyNetwork( network_config, load_model_path= "./train/Breakout/a3c_breakout_time_based_30_32_02-02-2018-15-53/model-49861769", load_model_path_2= "./train/Lifetime/Breakout/a3c_gpu_8_threads_breakout_time_based_30_40_03-09-2018-16-24/model-49002411", alpha=0.4, epsilon=0.02) agent = A3CAgent(network, environment, num_of_threads=1, report_frequency=1, num_of_epochs=1, steps_per_epoch=10000, log_dir="./thi_test/Lifetime/Breakout/a3c") agent.evaluate()
def train_ale_environment(): # Create an ALE for game Breakout environment = ALEEnvironment(ALEEnvironment.BREAKOUT) ################################################################################### # In case using Gym # environment = GymEnvironment("Breakout-v0", state_processor=AtariProcessor()) ################################################################################### # Create a network configuration for Atari DQN network_config = PrioritizedAtariDQNConfig(environment, initial_beta=0.4, initial_learning_rate=0.00025, debug_mode=True) # Create a policy network for DQN agent network = PolicyNetwork(network_config, max_num_of_checkpoints=100) # Create DQN agent agent = DQNAgent(network, environment, save_frequency=5e5, steps_per_epoch=1e6, num_of_epochs=50, exp_replay_size=2**19, importance_sampling=True, log_dir="./train/breakout/pdqn_check_point", prioritized_alpha=0.6, prioritized=True) # Train it agent.train()
def train_ale_environment(): # Create an ALE for game Breakout environment = ALEEnvironment(ALEEnvironment.BREAKOUT, loss_of_life_negative_reward=True, state_processor=AtariBlackenProcessor()) # Create a network configuration for Atari A3C network_config = AtariA3CConfig(environment, initial_learning_rate=0.004) # Create a shared network for A3C agent network = PolicyNetwork(network_config, num_of_checkpoints=40, using_gpu=True) # Create A3C agent agent = A3CAgent( network, environment, save_time_based=30, checkpoint_stop=40, log_dir= './train/smc/Breakout/a3c_gpu_8_threads_breakout_time_based_30_40', num_of_threads=8) # Train it agent.train()
def train_atari_sea_quest(): env = ALEEnvironment(ALEEnvironment.SEAQUEST, frame_skip=8) network_config = AtariA3CConfig(env, initial_learning_rate=0.004) network = PolicyNetwork(network_config, max_num_of_checkpoints=40) agent = AgentFactory.create(A3CLearner, network, env, num_of_epochs=40, steps_per_epoch=1e6, checkpoint_frequency=1e6, log_dir='./train/sea_quest/a3c_checkpoints') agent.train()
def eval_ale_environment(game, model_path, render, num_of_epochs, steps_per_epoch, stochastic, initial_epsilon, log_dir, human_interaction=False): if render or human_interaction: num_of_threads = 1 else: num_of_threads = 8 # Create an ALE for game Breakout environment = ALEEnvironment(game, is_render=render, max_episode_steps=200000) # Create a network configuration for Atari A3C if not stochastic: network_config = AtariA3CConfig(environment, stochastic=False) # Create a shared network for A3C agent network = PolicyNetwork(network_config, load_model_path=model_path) # Create A3C agent agent = A3CAgent(network, environment, num_of_threads=num_of_threads, using_e_greedy=True, initial_epsilon=initial_epsilon, report_frequency=1, num_of_epochs=num_of_epochs, steps_per_epoch=steps_per_epoch, log_dir=log_dir) else: network_config = AtariA3CConfig(environment) # Create a shared network for A3C agent network = PolicyNetwork(network_config, load_model_path=model_path) # Create A3C agent agent = A3CAgent(network, environment, num_of_threads=num_of_threads, report_frequency=1, num_of_epochs=num_of_epochs, steps_per_epoch=steps_per_epoch, log_dir=log_dir) # Evaluate it return agent.evaluate()
def compatible_1(): print('+++++++++++++++++++++++++++++++++++++++++++++++++') fruit_env = ALEEnvironment(rom_name=ALEEnvironment.BREAKOUT, state_processor=None) state = fruit_env.get_state_space() print(state.get_range()) print(tuple(state.get_shape())) print(fruit_env.get_action_space().get_range()) print(fruit_env.reset()) print(fruit_env.get_state()) print('+++++++++++++++++++++++++++++++++++++++++++++++++') print('+++++++++++++++++++++++++++++++++++++++++++++++++') env = ArcadeLearningEnvironment('../envs/roms/breakout.bin') state = env.states() print(state) print(env.actions()) print(env.reset()) print(env.get_states()) print(env.execute(0)) print('+++++++++++++++++++++++++++++++++++++++++++++++++') print('+++++++++++++++++++++++++++++++++++++++++++++++++') env = TFEnvironment(fruit_environment=fruit_env) print(env.states()) print(env.actions()) print(env.get_states()) print(env.execute(0)) print('+++++++++++++++++++++++++++++++++++++++++++++++++')
def train_ale_environment(): # Create an ALE for Breakout environment = ALEEnvironment(ALEEnvironment.BREAKOUT) # Create a network configuration for Atari A3C network_config = AtariA3CConfig(environment, initial_learning_rate=0.004, debug_mode=True) # Create a shared network for A3C agent network = PolicyNetwork(network_config, max_num_of_checkpoints=40) # Create an A3C agent agent = AgentFactory.create(A3CLearner, network, environment, num_of_epochs=40, steps_per_epoch=1e6, checkpoint_frequency=1e6, log_dir='./train/breakout/a3c_checkpoints') # Train it agent.train()
def evaluate_ale_environment(): # Create an ALE for Breakout and enable rendering environment = ALEEnvironment(ALEEnvironment.BREAKOUT, is_render=True) # Create a network configuration for Atari A3C network_config = AtariA3CConfig(environment) # Create a shared network for A3C agent network = PolicyNetwork(network_config, load_model_path='./train/breakout/a3c_checkpoints_10-23-2019-02-13/model-39030506') # Create an A3C agent, use only one learner as we want to show a GUI agent = AgentFactory.create(A3CLearner, network, environment, num_of_epochs=1, steps_per_epoch=10000, num_of_learners=1, log_dir='./test/breakout/a3c_checkpoints') # Evaluate it agent.evaluate()
def train_ale_environment(): # Create an ALE for game Breakout environment = ALEEnvironment(ALEEnvironment.BREAKOUT) # Create a network configuration for Atari DQN network_config = AtariDQNConfig(environment) # Put the configuration into a policy network network = PolicyNetwork(network_config) # Create a DQN agent agent = DQNAgent(network, environment, log_dir="./train/breakout/dqn_breakout") # Train it agent.train()
def composite_agents(main_model_path, auxiliary_model_path, alpha, epsilon): # Create a normal Breakout environment without negative reward environment = ALEEnvironment(ALEEnvironment.BREAKOUT) # Create a divide and conquer network configuration for Atari A3C network_config = DQAtariA3CConfig(environment) # Create a shared policy network network = PolicyNetwork(network_config, load_model_path=main_model_path) # Create an A3C agent agent = AgentFactory.create(DQA3CLearner, network, environment, num_of_epochs=1, steps_per_epoch=10000, checkpoint_frequency=1e5, learner_report_frequency=1, auxiliary_model_path=auxiliary_model_path, alpha=alpha, epsilon=epsilon) # Test it return agent.evaluate()
def train_breakout_with_a3c_remove_immutable_objects(): # Create an ALE for game Breakout, blacken top half of the state environment = ALEEnvironment(ALEEnvironment.BREAKOUT, loss_of_life_negative_reward=True, state_processor=AtariBlackenProcessor()) # Create a network configuration for Atari A3C network_config = AtariA3CConfig(environment, initial_learning_rate=0.004, debug_mode=True) # Create a shared network for A3C agent network = PolicyNetwork(network_config, max_num_of_checkpoints=50) # Create an A3C agent agent = AgentFactory.create(A3CLearner, network, environment, num_of_epochs=50, steps_per_epoch=1e6, checkpoint_frequency=1e6, log_dir='./train/breakout/a3c_smc_1_checkpoints') # Train it agent.train()
def train_atari_sea_quest_with_map(): def update_reward(rewards): oxy_low = rewards[3] if oxy_low == 1: reward = rewards[2] else: reward = rewards[0] + rewards[1] return reward env = ALEEnvironment(ALEEnvironment.SEAQUEST, state_processor=SeaquestMapProcessor(), frame_skip=8) network_config = A3CMapConfig(env, initial_learning_rate=0.004) network = PolicyNetwork(network_config, max_num_of_checkpoints=40) agent = AgentFactory.create(A3CMapLearner, network, env, num_of_epochs=40, steps_per_epoch=1e6, checkpoint_frequency=1e6, log_dir='./train/sea_quest/a3c_map_checkpoints', network_update_steps=12, update_reward_fnc=update_reward) agent.train()
def train_ale_environment(): # Create an ALE for Breakout environment = ALEEnvironment(ALEEnvironment.BREAKOUT) # Create a network configuration for Atari DQN network_config = AtariDQNConfig(environment, debug_mode=True) # Put the configuration into a policy network network = PolicyNetwork(network_config, max_num_of_checkpoints=40) # Create a DQN agent agent = AgentFactory.create(DQNLearner, network, environment, num_of_epochs=20, steps_per_epoch=1e6, checkpoint_frequency=5e5, log_dir='./train/breakout/dqn_checkpoints') # Train it agent.train()
def composite_agents(path_1, path_2, alpha, epsilon): environment = ALEEnvironment(ALEEnvironment.BREAKOUT) network_config = AtariDQA3CConfig(environment) network = DQPolicyNetwork(network_config, load_model_path=path_1, load_model_path_2=path_2, alpha=alpha, epsilon=epsilon) agent = A3CAgent(network, environment, num_of_threads=8, report_frequency=1, num_of_epochs=12, steps_per_epoch=10000, log_dir="./thi_test/Lifetime/Breakout/a3c") return agent.evaluate()
def train_breakout_with_a3c_normal(): # Create an ALE for game Breakout. negative reward for each lost life environment = ALEEnvironment(ALEEnvironment.BREAKOUT) # Create a network configuration for Atari A3C network_config = AtariA3CConfig(environment, initial_learning_rate=0.004) # Create a shared network for A3C agent network = PolicyNetwork(network_config, num_of_checkpoints=20) # Create A3C agent agent = A3CAgent( network, environment, num_of_epochs=70, steps_per_epoch=1e6, save_frequency=5e6, log_dir= './train/smc/Breakout/a3c_gpu_8_threads_breakout_time_based_normal', num_of_threads=8) # Train it agent.train()
def train_ale_environment(): # Create an ALE for game Breakout environment = ALEEnvironment(ALEEnvironment.PONG) ################################################################################### # In case using Gym # environment = GymEnvironment("Breakout-v0", state_processor=AtariProcessor()) ################################################################################### # Create a network configuration for Atari DQN using Duel network network_config = AtariDuelDQNConfig(environment) # Create a policy network for DQN agent (create maximum of 40 checkpoints) network = PolicyNetwork(network_config, num_of_checkpoints=40) # Create DQN agent (Save checkpoint every 30 minutes, stop training at checkpoint 40th) agent = DQNAgent(network, environment, save_time_based=30, checkpoint_stop=40, log_dir="./train/Pong/dueldqn_pong_time_based_30_40") # Train it agent.train()
def train_ale_environment_human_interaction(human_interaction=True): if human_interaction: # Create an ALE for game Breakout # 1: fast shoot # 2: slow shoot # 3: right # 4: left environment = ALEEnvironment( ALEEnvironment.RIVERRAID, is_render=True, disable_actions=[10, 11, 12, 13, 14, 15, 16, 17]) # Create a network configuration for Atari A3C network_config = AtariA3CConfig(environment, initial_learning_rate=0.001, debug_mode=True) # Create a shared network for A3C agent network = PolicyNetwork(network_config, num_of_checkpoints=40, using_gpu=True) # Create A3C agent agent = A3CAgent( network, environment, save_time_based=30, checkpoint_stop=40, log_dir= './train/Human/Riverraid/a3c_gpu_8_threads_river_disable_fire_time_based_30_40', num_of_threads=1) # Train it agent.train() else: # Create an ALE for game Breakout environment = ALEEnvironment(ALEEnvironment.RIVERRAID, is_render=False) # Create a network configuration for Atari A3C network_config = AtariA3CConfig(environment, initial_learning_rate=0.001, debug_mode=True) # Create a shared network for A3C agent network = PolicyNetwork(network_config, num_of_checkpoints=40, using_gpu=True) # Create A3C agent agent = A3CAgent( network, environment, save_time_based=30, checkpoint_stop=40, log_dir= './train/Human/Riverraid/a3c_gpu_8_threads_breakout_time_based_30_40', num_of_threads=8) # Train it agent.train()