Beispiel #1
0
    parameters = process_args(sys.argv[1:], Defaults)
    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    # --- Instantiate environment ---
    env = maze_env(rng, higher_dim_obs=HIGHER_DIM_OBS)

    # --- Instantiate learning_algo ---
    learning_algo = CRAR(env,
                         parameters.rms_decay,
                         parameters.rms_epsilon,
                         parameters.momentum,
                         parameters.clip_norm,
                         parameters.freeze_interval,
                         parameters.batch_size,
                         parameters.update_rule,
                         rng,
                         double_Q=True,
                         high_int_dim=HIGH_INT_DIM,
                         internal_dim=3)

    train_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 1.)
    test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 0.1)

    # --- Instantiate agent ---
    agent = NeuralAgent(env,
                        learning_algo,
                        parameters.replay_memory_size,
                        max(env.inputDimensions()[i][0]
                            for i in range(len(env.inputDimensions()))),
Beispiel #2
0
        rng = np.random.RandomState(parameters.seed)
        print(" deterministic, seed: ",parameters.seed)
        input_nnet = "normal_seed" + str(parameters.seed) + "_lr" + str(parameters.learning_rate) + "_lrd" + str(parameters.learning_rate_decay) 
    else:
        rng = np.random.RandomState()
    if parameters.dumpname != "":
        input_nnet = parameters.dumpname
    print("input nnet= ", input_nnet)
    # --- Instantiate environment ---
    env = maze_env(rng, higher_dim_obs=parameters.high_dim_obs, show_game=False)
    
    # --- Instantiate learning_algo ---
    learning_algo = CRAR(
        env,
        rng,
        double_Q=True,
        high_int_dim=HIGH_INT_DIM,
        internal_dim=3,
        div_entrop_loss=1.)
    
    train_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 1.)
    test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 0.1)

    # --- Instantiate agent ---
    agent = NeuralAgent(
        env,
        learning_algo,
        parameters.replay_memory_size,
        max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))),
        parameters.batch_size,
        rng,
Beispiel #3
0
    parameters = process_args(sys.argv[1:], Defaults)
    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()
    
    # --- Instantiate environment ---
    env = simple_maze_env(rng, higher_dim_obs=HIGHER_DIM_OBS)
    
    # --- Instantiate learning_algo ---
    learning_algo = CRAR(
        env,
        parameters.rms_decay,
        parameters.rms_epsilon,
        parameters.momentum,
        parameters.clip_norm,
        parameters.freeze_interval,
        parameters.batch_size,
        parameters.update_rule,
        rng,
        high_int_dim=False,
        internal_dim=2)
    
    test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 1.)

    # --- Instantiate agent ---
    agent = NeuralAgent(
        env,
        learning_algo,
        parameters.replay_memory_size,
        max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))),
        parameters.batch_size,
    parameters = process_args(sys.argv[1:], Defaults)
    if parameters.deterministic:
        rng = np.random.RandomState(parameters.seed)
    else:
        rng = np.random.RandomState()

    # --- Instantiate environment ---
    env = catcher_env(rng,
                      higher_dim_obs=HIGHER_DIM_OBS,
                      reverse=False,
                      show_game=True)

    # --- Instantiate learning algorithm ---
    learning_algo = CRAR(env,
                         rng,
                         double_Q=True,
                         high_int_dim=HIGH_INT_DIM,
                         internal_dim=3)

    test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng,
                                      0.1)  #1.)

    # --- Instantiate agent ---
    agent = NeuralAgent(env,
                        learning_algo,
                        parameters.replay_memory_size,
                        max(env.inputDimensions()[i][0]
                            for i in range(len(env.inputDimensions()))),
                        parameters.batch_size,
                        rng,
                        test_policy=test_policy)