agent.attach(bc.TrainerController( evaluate_on='action', periodicity=parameters.update_frequency, show_episode_avg_V_value=True, show_avg_Bellman_residual=True)) # We wish to discover, among all versions of our neural network (i.e., after every training epoch), which one # has the highest validation score. # To achieve this goal, one can use the FindBestController along with an InterleavedTestEpochControllers. It is # important that the validationID is the same than the id argument of the InterleavedTestEpochController. # The FindBestController will dump on disk the validation scores for each and every network, as well as the # structure of the neural network having the best validation score. These dumps can then used to plot the evolution # of the validation and test scores (see below) or simply recover the resulting neural network for your # application. agent.attach(bc.FindBestController( validationID=simple_maze_env.VALIDATION_MODE, testID=None, unique_fname=fname)) # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a # "validation epoch" between each training epoch. For each validation epoch, we want also to display the sum of all # rewards obtained, hence the showScore=True. Finally, we want to call the summarizePerformance method of ALE_env # every [parameters.period_btw_summary_perfs] *validation* epochs. agent.attach(bc.InterleavedTestEpochController( id=simple_maze_env.VALIDATION_MODE, epoch_length=parameters.steps_per_test, periodicity=1, show_score=True, summarize_every=1)) # --- Run the experiment --- try:
periodicity=1, reset_every='none')) # We wish to discover, among all versions of our neural network (i.e., after every training epoch), which one # seems to generalize the better, thus which one has the highest validation score. However we also want to keep # track of a "true generalization score", the "test score". Indeed, what if we overfit the validation score ? # To achieve these goals, one can use the FindBestController along two InterleavedTestEpochControllers, one for # each mode (validation and test). It is important that the validationID and testID are the same than the id # argument of the two InterleavedTestEpochControllers (implementing the validation mode and test mode # respectively). The FindBestController will dump on disk the validation and test scores for each and every # network, as well as the structure of the neural network having the best validation score. These dumps can then # used to plot the evolution of the validation and test scores (see below) or simply recover the resulting neural # network for your application. agent.attach( bc.FindBestController(validationID=MG_two_storages_env.VALIDATION_MODE, testID=MG_two_storages_env.TEST_MODE, unique_fname=fname)) # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a # "validation epoch" between each training epoch ("one of two epochs", hence the periodicity=2). We do not want # these validation epoch to interfere with the training of the agent, which is well established by the # TrainerController, EpsilonController and alike, nor with its testing (see next controller). Therefore, we will # disable these controllers for the whole duration of the validation epochs interleaved this way, using the # controllersToDisable argument of the InterleavedTestEpochController. For each validation epoch, we want also to # display the sum of all rewards obtained, hence the showScore=True. Finally, we never want this controller to call # the summarizePerformance method of MG_two_storage_env. agent.attach( bc.InterleavedTestEpochController( id=MG_two_storages_env.VALIDATION_MODE, epoch_length=parameters.steps_per_test, controllers_to_disable=[0, 1, 2, 3, 4, 7],
bc.TrainerController(evaluate_on='action', periodicity=parameters.update_frequency, show_episode_avg_V_value=True, show_avg_Bellman_residual=True)) # We wish to discover, among all versions of our neural network (i.e., after every training epoch), which one # has the highest validation score. # To achieve this goal, one can use the FindBestController along with an InterleavedTestEpochControllers. It is # important that the validationID is the same than the id argument of the InterleavedTestEpochController. # The FindBestController will dump on disk the validation scores for each and every network, as well as the # structure of the neural network having the best validation score. These dumps can then used to plot the evolution # of the validation and test scores (see below) or simply recover the resulting neural network for your # application. agent.attach( bc.FindBestController(validationID=simple_maze_env.VALIDATION_MODE, testID=None, unique_fname=fname, hasReward=False)) # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a # "validation epoch" between each training epoch. For each validation epoch, we want also to display the sum of all # rewards obtained, hence the showScore=True. Finally, we want to call the summarizePerformance method of ALE_env # every [parameters.period_btw_summary_perfs] *validation* epochs. agent.attach( bc.InterleavedTestEpochController( id=simple_maze_env.VALIDATION_MODE, epoch_length=parameters.steps_per_test, periodicity=1, show_score=True, summarize_every=1)) # --- Run the experiment ---
evaluate_on='action', periodicity=1, reset_every='none')) # We wish to discover, among all versions of our neural network (i.e., after every training epoch), which one # seems to generalize the better, thus which one has the highest validation score. Here, we do not care about the # "true generalization score", or "test score". # To achieve this goal, one can use the FindBestController along with an InterleavedTestEpochControllers. It is # important that the validationID is the same than the id argument of the InterleavedTestEpochController. # The FindBestController will dump on disk the validation scores for each and every network, as well as the # structure of the neural network having the best validation score. These dumps can then used to plot the evolution # of the validation and test scores (see below) or simply recover the resulting neural network for your # application. agent.attach( bc.FindBestController(validationID=ALE_env.VALIDATION_MODE, testID=None, unique_fname=fname)) # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a # "validation epoch" between each training epoch ("one of two epochs", hence the periodicity=2). We do not want # these validation epoch to interfere with the training of the agent, which is well established by the # TrainerController, EpsilonController and alike. Therefore, we will disable these controllers for the whole # duration of the validation epochs interleaved this way, using the controllersToDisable argument of the # InterleavedTestEpochController. For each validation epoch, we want also to display the sum of all rewards # obtained, hence the showScore=True. Finally, we want to call the summarizePerformance method of ALE_env every # [parameters.period_btw_summary_perfs] *validation* epochs. agent.attach( bc.InterleavedTestEpochController( id=ALE_env.VALIDATION_MODE, epoch_length=parameters.steps_per_test, controllers_to_disable=[0, 1, 2, 3, 4],
# TODO : best algorithm, hyperparameter tuning if args.network == 'DQN': network = MyQNetwork(environment=env, batch_size=32, freeze_interval=args.epochs[1], double_Q=True, random_state=rng) agent = NeuralAgent(env, network, replay_memory_size=min( int(args.epochs[0] * args.epochs[1] * 1.1), 100000), batch_size=32, random_state=rng) agent.setDiscountFactor(0.95) agent.attach(bc.FindBestController(validationID=0, unique_fname=args.fname)) agent.attach(bc.VerboseController()) agent.attach(bc.TrainerController()) agent.attach( bc.EpsilonController(initial_e=0.8, e_decays=args.epochs[0] * args.epochs[1], e_min=0.2)) agent.attach( bc.LearningRateController(args.learning_rate[0], args.learning_rate[1], args.learning_rate[2])) agent.attach( bc.InterleavedTestEpochController(epoch_length=1000, controllers_to_disable=[1, 2, 3, 4])) elif args.network == 'DDPG': network = MyACNetwork(environment=env, batch_size=32,
periodicity=1, reset_every='none')) # We wish to discover, among all versions of our neural network (i.e., after every training epoch), which one # seems to generalize the best, thus which one has the highest validation score. However we also want to keep # track of a "true generalization score", the "test score". Indeed, what if we overfit the validation score ? # To achieve these goals, one can use the FindBestController along two InterleavedTestEpochControllers, one for # each mode (validation and test). It is important that the validationID and testID are the same than the id # argument of the two InterleavedTestEpochControllers (implementing the validation mode and test mode # respectively). The FindBestController will dump on disk the validation and test scores for each and every # network, as well as the structure of the neural network having the best validation score. These dumps can then # used to plot the evolution of the validation and test scores (see below) or simply recover the resulting neural # network for your application. agent.attach( bc.FindBestController(validationID=env.VALIDATION_MODE, testID=env.TEST_MODE, unique_fname=fname)) # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a # "validation epoch" between each training epoch (hence the periodicity=1). For each validation epoch, we want also # to display the sum of all rewards obtained, hence the showScore=True. Finally, we never want this controller to # call the summarizePerformance method of MG_two_storage_env. agent.attach( bc.InterleavedTestEpochController( id=env.VALIDATION_MODE, epoch_length=parameters.steps_per_epoch, periodicity=1, show_score=True, summarize_every=-1)) # Besides inserting a validation epoch (required if one wants to find the best neural network over all training
e_min=parameters.epsilon_min, evaluate_on='action', periodicity=1, reset_every='none')) # We wish to discover, among all versions of our neural network (i.e., after every training epoch), which one # seems to generalize the better, thus which one has the highest validation score. Here, we do not care about the # "true generalization score", or "test score". # To achieve this goal, one can use the FindBestController along with an InterleavedTestEpochControllers. It is # important that the validationID is the same than the id argument of the InterleavedTestEpochController. # The FindBestController will dump on disk the validation scores for each and every network, as well as the # structure of the neural network having the best validation score. These dumps can then used to plot the evolution # of the validation and test scores (see below) or simply recover the resulting neural network for your # application. agent.attach( bc.FindBestController(validationID=2, testID=None, unique_fname=fname)) # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a # "validation epoch" between each training epoch ("one of two epochs", hence the periodicity=2). We do not want # these validation epoch to interfere with the training of the agent, which is well established by the # TrainerController, EpsilonController and alike. Therefore, we will disable these controllers for the whole # duration of the validation epochs interleaved this way, using the controllersToDisable argument of the # InterleavedTestEpochController. For each validation epoch, we want also to display the sum of all rewards # obtained, hence the showScore=True. Finally, we want to call the summarizePerformance method of ALE_env every # [parameters.period_btw_summary_perfs] *validation* epochs. agent.attach( bc.InterleavedTestEpochController( id=2, epoch_length=parameters.steps_per_test, periodicity=1, show_score=True,