Beispiel #1
0
def dqn_per_gridworld():
    hp = DictConfig({})

    hp.steps = 1000
    hp.batch_size = 500
    hp.replay_batch = 100
    hp.replay_size = 1000
    hp.delete_freq = 100 * (hp.batch_size + hp.replay_size)  # every 100 steps

    hp.env_record_freq = 100
    hp.env_record_duration = 25

    hp.max_steps = 50
    hp.grid_size = 4

    hp.lr = 1e-3
    hp.epsilon_exploration = 0.1
    hp.gamma_discount = 0.9

    model = (GenericConvModel(height=4,
                              width=4,
                              in_channels=4,
                              channels=[50],
                              out_size=4).float().to(device))

    train_dqn_per(
        GridWorldEnvWrapper,
        model,
        hp,
        project_name="SimpleGridWorld",
        run_name="dqn_per",
    )
Beispiel #2
0
    def test_dqn_vanilla(self, *_):
        from dqn.dqn import train_dqn

        hp = DictConfig({})

        hp.steps = 2
        hp.batch_size = 2
        hp.env_record_freq = 0
        hp.env_record_duration = 0

        hp.max_steps = 50
        hp.grid_size = 4

        hp.lr = 1e-3
        hp.epsilon_exploration = 0.1
        hp.gamma_discount = 0.9

        for case in env_cases:
            print(case["env"].__name__)

            model = GenericLinearModel(
                in_size=case["input"],
                units=[10],
                out_size=case["output"],
                flatten=case.get("flatten", False),
            )

            train_dqn(case["env"], model, hp)
Beispiel #3
0
def breakout_double_dqn():
    hp = DictConfig({})

    hp.steps = 2000
    hp.batch_size = 50

    hp.replay_batch = 50
    hp.replay_size = 1000

    hp.delete_freq = 50 * (hp.batch_size + hp.replay_size)  # every 100 steps
    hp.delete_percentage = 0.2

    hp.env_record_freq = 100
    hp.env_record_duration = 50

    hp.lr = 1e-3
    hp.gamma_discount = 0.9

    # hp.epsilon_exploration = 0.1
    hp.epsilon_flatten_step = 1500
    hp.epsilon_start = 1
    hp.epsilon_end = 0.1
    hp.epsilon_decay_function = decay_functions.LINEAR

    hp.target_model_sync_freq = 50

    model = GenericConvModel(42, 42, 3, [50, 50, 50], [100], 4)

    train_dqn_double(
        BreakoutEnvWrapper, model, hp, project_name="Breakout", run_name="double_dqn"
    )
Beispiel #4
0
def breakout_dqn():

    hp = DictConfig({})

    hp.steps = 2000
    hp.batch_size = 32
    hp.env_record_freq = 500
    hp.env_record_duration = 100
    hp.max_steps = 1000
    hp.lr = 1e-3
    hp.epsilon_exploration = 0.1
    hp.gamma_discount = 0.9

    model = GenericLinearModel(42 * 42 * 3, [100, 100], 4, flatten=True)

    train_dqn(
        BreakoutEnvWrapper, model, hp, project_name="Breakout", run_name="vanilla_dqn"
    )
Beispiel #5
0
def dqn_double():
    hp = DictConfig({})

    hp.steps = 1000
    hp.batch_size = 500

    hp.replay_batch = 100
    hp.replay_size = 1000

    hp.delete_freq = 100 * (hp.batch_size + hp.replay_size)  # every 100 steps

    hp.env_record_freq = 100
    hp.env_record_duration = 25

    hp.max_steps = 50
    hp.grid_size = 4

    hp.lr = 1e-3
    hp.gamma_discount = 0.9

    # hp.epsilon_exploration = 0.1
    hp.epsilon_flatten_step = 700
    hp.epsilon_start = 1
    hp.epsilon_end = 0.001
    hp.epsilon_decay_function = decay_functions.LINEAR

    hp.target_model_sync_freq = 50

    model = (GenericConvModel(height=4,
                              width=4,
                              in_channels=4,
                              channels=[50],
                              out_size=4).float().to(device))

    train_dqn_double(
        GridWorldEnvWrapper,
        model,
        hp,
        project_name="SimpleGridWorld",
        run_name="dqn_target",
    )
Beispiel #6
0
    max_steps = 500
    reward_range = (-10, 10)  # TODO: Fix this

    def __init__(self):
        super().__init__()
        self.env = gym.make(
            "GDY-Sokoban---2-v0",
            global_observer_type=gd.ObserverType.VECTOR,
            player_observer_type=gd.ObserverType.VECTOR,
            level=0,
        )


if __name__ == "__main__":

    hp = DictConfig({})

    hp.steps = 10000
    hp.batch_size = 1000
    hp.env_record_freq = 500
    hp.env_record_duration = 50
    hp.max_steps = 200
    hp.lr = 1e-3
    hp.epsilon_exploration = 0.1
    hp.gamma_discount = 0.9

    model = GenericLinearModel(5 * 7 * 8, [10], 5,
                               flatten=True).float().to(device)

    train_dqn(SokobanV2L0EnvWrapper, model, hp, name="SokobanV2L0")