parameters = process_args(sys.argv[1:], Defaults) if parameters.deterministic: rng = np.random.RandomState(123456) else: rng = np.random.RandomState() # --- Instantiate environment --- env = maze_env(rng, higher_dim_obs=HIGHER_DIM_OBS) # --- Instantiate learning_algo --- learning_algo = CRAR(env, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.clip_norm, parameters.freeze_interval, parameters.batch_size, parameters.update_rule, rng, double_Q=True, high_int_dim=HIGH_INT_DIM, internal_dim=3) train_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 1.) test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 0.1) # --- Instantiate agent --- agent = NeuralAgent(env, learning_algo, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))),
rng = np.random.RandomState(parameters.seed) print(" deterministic, seed: ",parameters.seed) input_nnet = "normal_seed" + str(parameters.seed) + "_lr" + str(parameters.learning_rate) + "_lrd" + str(parameters.learning_rate_decay) else: rng = np.random.RandomState() if parameters.dumpname != "": input_nnet = parameters.dumpname print("input nnet= ", input_nnet) # --- Instantiate environment --- env = maze_env(rng, higher_dim_obs=parameters.high_dim_obs, show_game=False) # --- Instantiate learning_algo --- learning_algo = CRAR( env, rng, double_Q=True, high_int_dim=HIGH_INT_DIM, internal_dim=3, div_entrop_loss=1.) train_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 1.) test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 0.1) # --- Instantiate agent --- agent = NeuralAgent( env, learning_algo, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng,
parameters = process_args(sys.argv[1:], Defaults) if parameters.deterministic: rng = np.random.RandomState(123456) else: rng = np.random.RandomState() # --- Instantiate environment --- env = simple_maze_env(rng, higher_dim_obs=HIGHER_DIM_OBS) # --- Instantiate learning_algo --- learning_algo = CRAR( env, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.clip_norm, parameters.freeze_interval, parameters.batch_size, parameters.update_rule, rng, high_int_dim=False, internal_dim=2) test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 1.) # --- Instantiate agent --- agent = NeuralAgent( env, learning_algo, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size,
parameters = process_args(sys.argv[1:], Defaults) if parameters.deterministic: rng = np.random.RandomState(parameters.seed) else: rng = np.random.RandomState() # --- Instantiate environment --- env = catcher_env(rng, higher_dim_obs=HIGHER_DIM_OBS, reverse=False, show_game=True) # --- Instantiate learning algorithm --- learning_algo = CRAR(env, rng, double_Q=True, high_int_dim=HIGH_INT_DIM, internal_dim=3) test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 0.1) #1.) # --- Instantiate agent --- agent = NeuralAgent(env, learning_algo, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng, test_policy=test_policy)