def dqn_per_gridworld(): hp = DictConfig({}) hp.steps = 1000 hp.batch_size = 500 hp.replay_batch = 100 hp.replay_size = 1000 hp.delete_freq = 100 * (hp.batch_size + hp.replay_size) # every 100 steps hp.env_record_freq = 100 hp.env_record_duration = 25 hp.max_steps = 50 hp.grid_size = 4 hp.lr = 1e-3 hp.epsilon_exploration = 0.1 hp.gamma_discount = 0.9 model = (GenericConvModel(height=4, width=4, in_channels=4, channels=[50], out_size=4).float().to(device)) train_dqn_per( GridWorldEnvWrapper, model, hp, project_name="SimpleGridWorld", run_name="dqn_per", )
def test_dqn_vanilla(self, *_): from dqn.dqn import train_dqn hp = DictConfig({}) hp.steps = 2 hp.batch_size = 2 hp.env_record_freq = 0 hp.env_record_duration = 0 hp.max_steps = 50 hp.grid_size = 4 hp.lr = 1e-3 hp.epsilon_exploration = 0.1 hp.gamma_discount = 0.9 for case in env_cases: print(case["env"].__name__) model = GenericLinearModel( in_size=case["input"], units=[10], out_size=case["output"], flatten=case.get("flatten", False), ) train_dqn(case["env"], model, hp)
def breakout_double_dqn(): hp = DictConfig({}) hp.steps = 2000 hp.batch_size = 50 hp.replay_batch = 50 hp.replay_size = 1000 hp.delete_freq = 50 * (hp.batch_size + hp.replay_size) # every 100 steps hp.delete_percentage = 0.2 hp.env_record_freq = 100 hp.env_record_duration = 50 hp.lr = 1e-3 hp.gamma_discount = 0.9 # hp.epsilon_exploration = 0.1 hp.epsilon_flatten_step = 1500 hp.epsilon_start = 1 hp.epsilon_end = 0.1 hp.epsilon_decay_function = decay_functions.LINEAR hp.target_model_sync_freq = 50 model = GenericConvModel(42, 42, 3, [50, 50, 50], [100], 4) train_dqn_double( BreakoutEnvWrapper, model, hp, project_name="Breakout", run_name="double_dqn" )
def breakout_dqn(): hp = DictConfig({}) hp.steps = 2000 hp.batch_size = 32 hp.env_record_freq = 500 hp.env_record_duration = 100 hp.max_steps = 1000 hp.lr = 1e-3 hp.epsilon_exploration = 0.1 hp.gamma_discount = 0.9 model = GenericLinearModel(42 * 42 * 3, [100, 100], 4, flatten=True) train_dqn( BreakoutEnvWrapper, model, hp, project_name="Breakout", run_name="vanilla_dqn" )
def dqn_double(): hp = DictConfig({}) hp.steps = 1000 hp.batch_size = 500 hp.replay_batch = 100 hp.replay_size = 1000 hp.delete_freq = 100 * (hp.batch_size + hp.replay_size) # every 100 steps hp.env_record_freq = 100 hp.env_record_duration = 25 hp.max_steps = 50 hp.grid_size = 4 hp.lr = 1e-3 hp.gamma_discount = 0.9 # hp.epsilon_exploration = 0.1 hp.epsilon_flatten_step = 700 hp.epsilon_start = 1 hp.epsilon_end = 0.001 hp.epsilon_decay_function = decay_functions.LINEAR hp.target_model_sync_freq = 50 model = (GenericConvModel(height=4, width=4, in_channels=4, channels=[50], out_size=4).float().to(device)) train_dqn_double( GridWorldEnvWrapper, model, hp, project_name="SimpleGridWorld", run_name="dqn_target", )
max_steps = 500 reward_range = (-10, 10) # TODO: Fix this def __init__(self): super().__init__() self.env = gym.make( "GDY-Sokoban---2-v0", global_observer_type=gd.ObserverType.VECTOR, player_observer_type=gd.ObserverType.VECTOR, level=0, ) if __name__ == "__main__": hp = DictConfig({}) hp.steps = 10000 hp.batch_size = 1000 hp.env_record_freq = 500 hp.env_record_duration = 50 hp.max_steps = 200 hp.lr = 1e-3 hp.epsilon_exploration = 0.1 hp.gamma_discount = 0.9 model = GenericLinearModel(5 * 7 * 8, [10], 5, flatten=True).float().to(device) train_dqn(SokobanV2L0EnvWrapper, model, hp, name="SokobanV2L0")