logging.basicConfig(level=logging.INFO)

    # --- Parse parameters ---
    parameters = process_args(sys.argv[1:], Defaults)
    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    # --- Instantiate environment ---
    env = ALE_env(rng,
                  frame_skip=parameters.frame_skip,
                  ale_options=[{
                      "key": "random_seed",
                      "value": rng.randint(9999)
                  }, {
                      "key": "color_averaging",
                      "value": True
                  }, {
                      "key": "repeat_action_probability",
                      "value": 0.
                  }])

    # --- Instantiate qnetwork ---
    qnetwork = MyQNetwork(env, parameters.rms_decay, parameters.rms_epsilon,
                          parameters.momentum, parameters.clip_delta,
                          parameters.freeze_interval, parameters.batch_size,
                          parameters.update_rule, rng)

    test_policy = EpsilonGreedyPolicy(qnetwork, env.nActions(), rng, 0.05)

    # --- Instantiate agent ---
Exemple #2
0
if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)

    # --- Parse parameters ---
    parameters = process_args(sys.argv[1:], Defaults)
    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    # --- Instantiate environment ---
    env = ALE_env(
        rng,
        frame_skip=parameters.frame_skip,
        ale_options=[
            {"key": "random_seed", "value": rng.randint(9999)},
            {"key": "color_averaging", "value": True},
            {"key": "repeat_action_probability", "value": 0.0},
        ],
    )

    # --- Instantiate qnetwork ---
    qnetwork = MyQNetwork(
        env,
        parameters.rms_decay,
        parameters.rms_epsilon,
        parameters.momentum,
        parameters.clip_delta,
        parameters.freeze_interval,
        parameters.batch_size,
        parameters.update_rule,
Exemple #3
0
    logging.basicConfig(level=logging.INFO)

    # --- Parse parameters ---
    parameters = process_args(sys.argv[1:], Defaults)
    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    # --- Instantiate environment ---
    env = ALE_env(rng,
                  frame_skip=parameters.frame_skip,
                  ale_options=[{
                      "key": "random_seed",
                      "value": rng.randint(9999)
                  }, {
                      "key": "color_averaging",
                      "value": True
                  }, {
                      "key": "repeat_action_probability",
                      "value": 0.
                  }])

    # --- Instantiate qnetwork ---
    qnetwork = MyQNetwork(env, parameters.rms_decay, parameters.rms_epsilon,
                          parameters.momentum, parameters.clip_delta,
                          parameters.freeze_interval, parameters.batch_size,
                          parameters.network_type, parameters.update_rule,
                          parameters.batch_accumulator, rng)

    # --- Instantiate agent ---
    agent = ALEAgent(
Exemple #4
0


if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    
    # --- Parse parameters ---
    parameters = process_args(sys.argv[1:], Defaults)
    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()
    
    # --- Instantiate environment ---
    env = ALE_env(rng, frame_skip=parameters.frame_skip, 
                ale_options=[{"key": "random_seed", "value": rng.randint(9999)}, 
                             {"key": "color_averaging", "value": True},
                             {"key": "repeat_action_probability", "value": 0.}])

    # --- Instantiate qnetwork ---
    qnetwork = MyQNetwork(
        env,
        parameters.rms_decay,
        parameters.rms_epsilon,
        parameters.momentum,
        parameters.clip_delta,
        parameters.freeze_interval,
        parameters.batch_size,
        parameters.network_type,
        parameters.update_rule,
        parameters.batch_accumulator,
        rng)