HIGHER_DIM_OBS = True
HIGH_INT_DIM = False

if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)

    # --- Parse parameters ---
    parameters = process_args(sys.argv[1:], Defaults)
    if parameters.deterministic:
        rng = np.random.RandomState(parameters.seed)
    else:
        rng = np.random.RandomState()

    # --- Instantiate environment ---
    env = catcher_env(rng,
                      higher_dim_obs=HIGHER_DIM_OBS,
                      reverse=False,
                      show_game=True)

    # --- Instantiate learning algorithm ---
    learning_algo = CRAR(env,
                         rng,
                         double_Q=True,
                         high_int_dim=HIGH_INT_DIM,
                         internal_dim=3)

    test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng,
                                      0.1)  #1.)

    # --- Instantiate agent ---
    agent = NeuralAgent(env,
                        learning_algo,
Exemple #2
0
HIGH_INT_DIM = False

if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)

    # --- Parse parameters ---
    parameters = process_args(sys.argv[1:], Defaults)
    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    # --- Instantiate environment ---
    env = catcher_env(rng,
                      higher_dim_obs=parameters.high_dim_obs,
                      reverse=False,
                      show_game=False)

    # --- Instantiate learning algorithm ---
    learning_algo = CRAR(env,
                         parameters.rms_decay,
                         parameters.rms_epsilon,
                         parameters.momentum,
                         parameters.clip_norm,
                         parameters.freeze_interval,
                         parameters.batch_size,
                         parameters.update_rule,
                         rng,
                         double_Q=True,
                         high_int_dim=HIGH_INT_DIM,
                         internal_dim=3)
Exemple #3
0
HIGHER_DIM_OBS = True
HIGH_INT_DIM = False

if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)

    # --- Parse parameters ---
    parameters = process_args(sys.argv[1:], Defaults)
    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    # --- Instantiate environment ---
    env = catcher_env(rng, higher_dim_obs=HIGHER_DIM_OBS, reverse=False)

    # --- Instantiate learning algorithm ---
    learning_algo = CRAR(env,
                         parameters.rms_decay,
                         parameters.rms_epsilon,
                         parameters.momentum,
                         parameters.clip_norm,
                         parameters.freeze_interval,
                         parameters.batch_size,
                         parameters.update_rule,
                         rng,
                         double_Q=True,
                         high_int_dim=HIGH_INT_DIM,
                         internal_dim=3)