def declare_trainer(): ## Initializing our StarCraft 2 environment env = SC2Env(map_name="Simple64", render=False, step_mul=16, player_race="terran", enemy_race="terran", difficulty="very_easy") #action_wrapper = MOspatialTerranWrapper(10, 10, env.env_instance._interface_formats[0]._raw_resolution) action_wrapper = SimpleMOTerranWrapper(10, 10, env.env_instance._interface_formats[0]._raw_resolution) #state_builder = Simple64GridState_SimpleTerran(grid_size=7) # This state_builder with grid_size=7 will end upt with a total size of 110 ( (7*7)*2 + 12 ) state_builder = TVTUnitStackingEnemyGridState(grid_size=4) # total size of 92 helper = ModelBuilder() helper.add_input_layer() helper.add_fullyconn_layer(nodes=82) helper.add_output_layer() dq_network = DDQNKerasMO(action_wrapper=action_wrapper, state_builder=state_builder, build_model=helper.get_model_layout(), per_episode_epsilon_decay=False, learning_rate=0.001, epsilon_decay=0.99999, epsilon_min=0.005, memory_maxlen=100000, min_memory_size=2000, batch_size=32) # Terran agent agent = SC2Agent(dq_network, KilledUnitsReward()) # trainer = Trainer(env, agent, save_path='/home/lpdcalves/', file_name="terran_ddqn_mo_v_easy", # save_every=200, enable_save=True, relative_path=False, reset_epsilon=False, # max_training_episodes=3000, max_steps_training=1200, # max_test_episodes=100, max_steps_testing=1200, rolling_avg_window_size=50) trainer = Trainer(env, agent, save_path='urnai/models/saved', file_name="terran_ddqn_mostackgridstate_atkgrouping4", save_every=20, enable_save=True, relative_path=True, reset_epsilon=False, max_training_episodes=10, max_steps_training=1200, max_test_episodes=2, max_steps_testing=1200, rolling_avg_window_size=5) return trainer
def declare_trainer(): env = SC2Env(map_name="Simple64", render=False, step_mul=16, player_race="terran", enemy_race="terran", difficulty="very_easy") action_wrapper = SimpleTerranWrapper(atk_grid_x=4, atk_grid_y=4) state_builder = MultipleUnitGridState(4) helper = ModelBuilder() helper.add_input_layer() helper.add_fullyconn_layer(nodes=50) helper.add_output_layer() dq_network = DoubleDeepQLearning(action_wrapper=action_wrapper, state_builder=state_builder, build_model=helper.get_model_layout(), use_memory=True, gamma=0.99, learning_rate=0.001, memory_maxlen=100000, min_memory_size=2000, lib="keras", epsilon_decay=0.9994, epsilon_start=0.6, epsilon_min=0.005, epsilon_linear_decay=True, per_episode_epsilon_decay=True) agent = SC2Agent(dq_network, KilledUnitsReward()) # trainer = Trainer(env, agent, save_path='/home/lpdcalves/', file_name="tvt_veasy_newactwrapper_t1", # save_every=200, enable_save=True, relative_path=False, reset_epsilon=False, # max_training_episodes=3000, max_steps_training=1500, # max_test_episodes=100, max_steps_testing=1500, rolling_avg_window_size=50) trainer = Trainer(env, agent, save_path='urnai/models/saved', file_name="terran_ddql_newgridstate7", save_every=6, enable_save=True, relative_path=True, reset_epsilon=False, max_training_episodes=2, max_steps_training=1000, max_test_episodes=2, max_steps_testing=100, rolling_avg_window_size=5) return trainer
def declare_trainer(): env = GymEnv(id="Breakout-ram-v0", render=True) action_wrapper = env.get_action_wrapper() #state_builder = PureState(env.env_instance.observation_space) state_builder = GymState(env.env_instance.observation_space.shape[0]) helper = ModelBuilder() # helper.add_input_layer(nodes=32) # helper.add_fullyconn_layer(8) helper.add_input_layer(nodes=66) helper.add_output_layer() # dq_network = DQNKeras(action_wrapper=action_wrapper, state_builder=state_builder, # gamma=0.99, learning_rate=0.001, epsilon_decay=0.9995, epsilon_min=0.01, # build_model=helper.get_model_layout(), memory_maxlen=5000) # dq_network = DDQNKeras(action_wrapper=action_wrapper, state_builder=state_builder, build_model=helper.get_model_layout(), use_memory=False, # gamma=0.99, learning_rate=0.001, epsilon_decay=0.999997, epsilon_min=0.01, memory_maxlen=100000, min_memory_size=2000) dq_network = DeepQLearning(action_wrapper=action_wrapper, state_builder=state_builder, build_model=helper.get_model_layout(), gamma=0.99, learning_rate=0.01, epsilon_decay=0.998, epsilon_min=0.01, memory_maxlen=100000, min_memory_size=4000, per_episode_epsilon_decay=True, lib="pytorch") #dq_network = PGKeras(action_wrapper, state_builder, learning_rate=0.001, gamma=0.99, build_model=helper.get_model_layout()) agent = GenericAgent(dq_network, PureReward()) trainer = Trainer(env, agent, save_path='urnai/models/saved', file_name="breakout-ram-v0_dql_66_pytorch", save_every=300, enable_save=True, relative_path=True, max_training_episodes=1200, max_steps_training=1800, max_test_episodes=100, max_steps_testing=1800) return trainer
def declare_trainer(): env = GymEnv(id="CartPole-v1", render=False) action_wrapper = env.get_action_wrapper() state_builder = GymState(env.env_instance.observation_space.shape[0]) helper = ModelBuilder() helper.add_input_layer(nodes=50) #helper.add_fullyconn_layer(50) helper.add_output_layer() dq_network = DeepQLearning(action_wrapper=action_wrapper, state_builder=state_builder, build_model=helper.get_model_layout(), gamma=0.99, use_memory=True, learning_rate=0.001, epsilon_decay=0.9997, epsilon_min=0.01, memory_maxlen=50000, min_memory_size=100, batch_size=32, lib="pytorch") # dq_network = DQNPytorch(action_wrapper=action_wrapper, state_builder=state_builder, build_model=helper.get_model_layout(), # gamma=0.99, learning_rate=0.001, epsilon_decay=0.9997, epsilon_min=0.01, memory_maxlen=50000, min_memory_size=100) # dq_network = DDQNKeras(action_wrapper=action_wrapper, state_builder=state_builder, build_model=helper.get_model_layout(), use_memory=False, # gamma=0.99, learning_rate=0.001, epsilon_decay=0.999997, epsilon_min=0.01, memory_maxlen=100000, min_memory_size=2000) agent = GenericAgent(dq_network, PureReward()) # Cartpole-v1 is solved when avg. reward over 100 episodes is greater than or equal to 475 trainer = Trainer(env, agent, save_path='urnai/models/saved', file_name="cartpole_v1_dql_pytorch1", save_every=100, enable_save=True, relative_path=True, max_training_episodes=1000, max_steps_training=1000, max_test_episodes=100, max_steps_testing=1000) return trainer
def declare_trainer(): env = GymEnv(id="Breakout-v0", render=True) img = env.reset() action_wrapper = env.get_action_wrapper() state_builder = GymState(env.env_instance.observation_space.shape) helper = ModelBuilder() helper.add_convolutional_layer( filters=8, kernel_size=4, input_shape=env.env_instance.observation_space.shape) helper.add_maxpooling_layer() helper.add_flatten_layer() helper.add_fullyconn_layer(10) helper.add_output_layer() dq_network = DeepQLearning(action_wrapper=action_wrapper, state_builder=state_builder, build_model=helper.get_model_layout(), gamma=0.99, learning_rate=0.01, epsilon_decay=0.9999, epsilon_min=0.005, memory_maxlen=50000, min_memory_size=2000, lib="keras") agent = GenericAgent(dq_network, PureReward()) trainer = Trainer(env, agent, save_path='urnai/models/saved', file_name="breakout-v0_keras", save_every=100, enable_save=True, relative_path=True, max_training_episodes=50, max_steps_training=800, max_test_episodes=5, max_steps_testing=800) return trainer
def declare_trainer(): env = SC2Env(map_name="Simple64", render=False, step_mul=16, player_race="terran", enemy_race="random", difficulty="very_easy") action_wrapper = SimpleTerranWrapper() state_builder = Simple64GridState(grid_size=4) helper = ModelBuilder() helper.add_input_layer(nodes=50) helper.add_fullyconn_layer(50) helper.add_output_layer() dq_network = DDQNKeras(action_wrapper=action_wrapper, state_builder=state_builder, build_model=helper.get_model_layout(), per_episode_epsilon_decay=False, gamma=0.99, learning_rate=0.001, epsilon_decay=0.99999, epsilon_min=0.005, memory_maxlen=100000, min_memory_size=2000) agent = SC2Agent(dq_network, KilledUnitsReward()) trainer = Trainer(env, agent, save_path='urnai/models/saved', file_name="terran_ddqn_vs_random_v_easy", save_every=50, enable_save=True, relative_path=True, max_training_episodes=3000, max_steps_training=1200, max_test_episodes=100, max_steps_testing=1200) return trainer
def declare_trainer(): env = GymEnv(id="CartPole-v1") action_wrapper = env.get_action_wrapper() state_builder = GymState(env.env_instance.observation_space.shape[0]) helper = ModelBuilder() helper.add_input_layer(nodes=50) helper.add_fullyconn_layer(50) helper.add_output_layer() dq_network = DDQNKeras(action_wrapper=action_wrapper, state_builder=state_builder, build_model=helper.get_model_layout(), use_memory=False, gamma=0.99, learning_rate=0.001, epsilon_decay=0.9997, epsilon_min=0.01, memory_maxlen=50000, min_memory_size=1000) # dq_network = PGKeras(action_wrapper, state_builder, learning_rate=0.001, gamma=0.99, build_model=helper.get_model_layout()) agent = GenericAgent(dq_network, PureReward()) # Cartpole-v1 is solved when avg. reward over 100 episodes is greater than or equal to 475 #test_params = TestParams(num_matches=100, steps_per_test=100, max_steps=500, reward_threshold=500) trainer = Trainer(env, agent, save_path='urnai/models/saved', file_name="cartpole_v1_ddqn_50x50_test", save_every=100, enable_save=True, relative_path=True, max_training_episodes=1000, max_steps_training=500, max_test_episodes=100, max_steps_testing=500) return trainer
def declare_trainer(): env = GymEnv(id="CartPole-v0") action_wrapper = env.get_action_wrapper() state_builder = GymState(env.env_instance.observation_space.shape[0]) helper = ModelBuilder() helper.add_input_layer() helper.add_fullyconn_layer(25) helper.add_output_layer() dq_network = DDQNKeras(action_wrapper=action_wrapper, state_builder=state_builder, build_model=helper.get_model_layout(), use_memory=False, gamma=0.99, learning_rate=0.001, epsilon_decay=0.9997, epsilon_min=0.01, memory_maxlen=50000, min_memory_size=1000) agent = GenericAgent(dq_network, PureReward()) # Cartpole-v0 is solved when avg. reward over 100 episodes is greater than or equal to 195 trainer = Trainer(env, agent, save_path='urnai/models/saved', file_name="cartpole-v0-unified-step-play3", save_every=100, enable_save=True, relative_path=True, max_training_episodes=1000, max_steps_training=500, max_test_episodes=100, max_steps_testing=500) return trainer
def declare_trainer(): env = GymEnv(id="FrozenLakeNotSlippery-v0") action_wrapper = env.get_action_wrapper() state_builder = FrozenLakeState() training_date = str(datetime.now()).replace(" ", "_").replace(":", "_").replace( ".", "_") helper = ModelBuilder() helper.add_input_layer() helper.add_fullyconn_layer(256) helper.add_fullyconn_layer(256) helper.add_fullyconn_layer(256) helper.add_fullyconn_layer(256) helper.add_output_layer() dq_network = DDQNKeras(action_wrapper=action_wrapper, state_builder=state_builder, learning_rate=0.005, gamma=0.90, use_memory=False, per_episode_epsilon_decay=True, build_model=helper.get_model_layout()) agent = GenericAgent(dq_network, FrozenlakeReward()) trainer = Trainer(env, agent, file_name=training_date + os.path.sep + "frozenlake_test_ddqnKeras", save_every=1000, enable_save=True, max_training_episodes=1000, max_steps_training=500, max_test_episodes=10, max_steps_testing=500) return trainer