# --- Instantiate qnetwork --- qnetwork = MyQNetwork( environment=env, random_state=rng) # --- Instantiate agent --- agent = NeuralAgent( env, qnetwork, random_state=rng) # --- Bind controllers to the agent --- # Before every training epoch, we want to print a summary of the agent's epsilon, discount and # learning rate as well as the training epoch number. agent.attach(bc.VerboseController()) # During training epochs, we want to train the agent after every action it takes. # Plus, we also want to display after each training episode (!= than after every training) the average bellman # residual and the average of the V values obtained during the last episode. agent.attach(bc.TrainerController()) # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a # "test epoch" between each training epoch. We do not want these test epoch to interfere with the training of the # agent. Therefore, we will disable these controllers for the whole duration of the test epochs interleaved this # way, using the controllersToDisable argument of the InterleavedTestEpochController. The value of this argument # is a list of the indexes of all controllers to disable, their index reflecting in which order they were added. agent.attach(bc.InterleavedTestEpochController( epoch_length=500, controllers_to_disable=[0, 1]))
double_Q=True) # --- Instantiate agent --- agent = NeuralAgent( env, qnetwork, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng, exp_priority=1.) # --- Bind controllers to the agent --- # For comments, please refer to run_toy_env.py agent.attach(bc.VerboseController( evaluate_on='epoch', periodicity=1)) agent.attach(bc.TrainerController( evaluate_on='action', periodicity=parameters.update_frequency, show_episode_avg_V_value=True, show_avg_Bellman_residual=True)) agent.attach(bc.LearningRateController( initial_learning_rate=parameters.learning_rate, learning_rate_decay=parameters.learning_rate_decay, periodicity=1)) agent.attach(bc.DiscountFactorController( initial_discount_factor=parameters.discount,
rng, DoubleQ=True) # --- Instantiate agent --- agent = NeuralAgent( env, qnetwork, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng) # --- Bind controllers to the agent --- # For comments, please refer to run_toy_env.py agent.attach(bc.VerboseController( evaluateOn='epoch', periodicity=1)) agent.attach(bc.TrainerController( evaluateOn='action', periodicity=parameters.update_frequency, showEpisodeAvgVValue=False, showAvgBellmanResidual=False)) agent.attach(bc.LearningRateController( initialLearningRate=parameters.learning_rate, learningRateDecay=parameters.learning_rate_decay, periodicity=1)) agent.attach(bc.DiscountFactorController( initialDiscountFactor=parameters.discount,
max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng, test_policy=test_policy, ) # --- Create unique filename for FindBestController --- h = hash(vars(parameters), hash_name="sha1") fname = "ALE_" + h print("The parameters hash is: {}".format(h)) print("The parameters are: {}".format(parameters)) # --- Bind controllers to the agent --- # Before every training epoch (periodicity=1), we want to print a summary of the agent's epsilon, discount and # learning rate as well as the training epoch number. agent.attach(bc.VerboseController(evaluate_on="epoch", periodicity=1)) # During training epochs, we want to train the agent after every [parameters.update_frequency] action it takes. # Plus, we also want to display after each training episode (!= than after every training) the average bellman # residual and the average of the V values obtained during the last episode, hence the two last arguments. agent.attach( bc.TrainerController( evaluate_on="action", periodicity=parameters.update_frequency, show_episode_avg_V_value=True, show_avg_Bellman_residual=True, ) ) # Every epoch end, one has the possibility to modify the learning rate using a LearningRateController. Here we # wish to update the learning rate after every training epoch (periodicity=1), according to the parameters given.
max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng, test_policy=test_policy) # --- Create unique filename for FindBestController --- h = hash(vars(parameters), hash_name="sha1") fname = "test_" + h print("The parameters hash is: {}".format(h)) print("The parameters are: {}".format(parameters)) # --- Bind controllers to the agent --- # Before every training epoch (periodicity=1), we want to print a summary of the agent's epsilon, discount and # learning rate as well as the training epoch number. agent.attach(bc.VerboseController(evaluate_on='epoch', periodicity=1)) # As for the discount factor and the learning rate, one can update periodically the parameter of the epsilon-greedy # policy implemented by the agent. This controllers has a bit more capabilities, as it allows one to choose more # precisely when to update epsilon: after every X action, episode or epoch. This parameter can also be reset every # episode or epoch (or never, hence the resetEvery='none'). agent.attach( bc.EpsilonController(initial_e=parameters.epsilon_start, e_decays=parameters.epsilon_decay, e_min=parameters.epsilon_min, evaluate_on='action', periodicity=1, reset_every='none')) # --- Run the experiment --- try:
# --- Instantiate agent --- agent = NeuralAgent(env, qnetwork, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng, exp_priority=1., train_policy=train_policy, test_policy=test_policy) # --- Bind controllers to the agent --- # For comments, please refer to run_toy_env.py agent.attach(bc.VerboseController(evaluate_on='epoch', periodicity=1)) agent.attach( bc.TrainerController(evaluate_on='action', periodicity=parameters.update_frequency, show_episode_avg_V_value=True, show_avg_Bellman_residual=True)) agent.attach( bc.LearningRateController( initial_learning_rate=parameters.learning_rate, learning_rate_decay=parameters.learning_rate_decay, periodicity=1)) agent.attach( bc.DiscountFactorController(
rng = np.random.RandomState(123456) # --- Instantiate environment --- env = Toy_env(rng) # --- Instantiate qnetwork --- qnetwork = MyQNetwork(environment=env, random_state=rng) # --- Instantiate agent --- agent = NeuralAgent(env, qnetwork, random_state=rng) # --- Bind controllers to the agent --- # Before every training epoch, we want to print a summary of the agent's epsilon, discount and # learning rate as well as the training epoch number. agent.attach(bc.VerboseController()) # During training epochs, we want to train the agent after every action it takes. # Plus, we also want to display after each training episode (!= than after every training) the average bellman # residual and the average of the V values obtained during the last episode. agent.attach(bc.TrainerController()) # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a # "test epoch" between each training epoch. We do not want these test epoch to interfere with the training of the # agent. Therefore, we will disable these controllers for the whole duration of the test epochs interleaved this # way, using the controllersToDisable argument of the InterleavedTestEpochController. The value of this argument # is a list of the indexes of all controllers to disable, their index reflecting in which order they were added. agent.attach( bc.InterleavedTestEpochController(epoch_length=500, controllers_to_disable=[0, 1]))
rng = np.random.RandomState(123456) # --- Instantiate environment --- env = Toy_env(rng) # --- Instantiate qnetwork --- qnetwork = MyQNetwork(environment=env, random_state=rng) # --- Instantiate agent --- agent = NeuralAgent(env, qnetwork, random_state=rng) # --- Bind controllers to the agent --- # Before every training epoch, we want to print a summary of the agent's epsilon, discount and # learning rate as well as the training epoch number. agent.attach(bc.VerboseController()) # During training epochs, we want to train the agent after every action it takes. # Plus, we also want to display after each training episode (!= than after every training) the average bellman # residual and the average of the V values obtained during the last episode. agent.attach(bc.TrainerController()) # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a # "test epoch" between each training epoch. We do not want these test epoch to interfere with the training of the # agent. Therefore, we will disable these controllers for the whole duration of the test epochs interleaved this # way, using the controllersToDisable argument of the InterleavedTestEpochController. The value of this argument # is a list of the indexes of all controllers to disable, their index reflecting in which order they were added. agent.attach(bc.InterleavedTestEpochController(epoch_length=500)) # --- Run the experiment --- agent.run(n_epochs=100, epoch_length=1000)
rng, DoubleQ=True) # --- Instantiate agent --- agent = NeuralAgent( env, qnetwork, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng) # --- Bind controllers to the agent --- # For comments, please refer to run_toy_env.py agent.attach(bc.VerboseController( evaluateOn='epoch', periodicity=1)) agent.attach(bc.TrainerController( evaluateOn='action', periodicity=parameters.update_frequency, showEpisodeAvgVValue=True, showAvgBellmanResidual=True)) agent.attach(bc.LearningRateController( initialLearningRate=parameters.learning_rate, learningRateDecay=parameters.learning_rate_decay, periodicity=1)) agent.attach(bc.DiscountFactorController( initialDiscountFactor=parameters.discount,