Esempio n. 1
0
 agent.attach(bc.TrainerController(
     evaluate_on='action', 
     periodicity=parameters.update_frequency, 
     show_episode_avg_V_value=True, 
     show_avg_Bellman_residual=True))
 
 # We wish to discover, among all versions of our neural network (i.e., after every training epoch), which one 
 # has the highest validation score.
 # To achieve this goal, one can use the FindBestController along with an InterleavedTestEpochControllers. It is 
 # important that the validationID is the same than the id argument of the InterleavedTestEpochController.
 # The FindBestController will dump on disk the validation scores for each and every network, as well as the 
 # structure of the neural network having the best validation score. These dumps can then used to plot the evolution 
 # of the validation and test scores (see below) or simply recover the resulting neural network for your 
 # application.
 agent.attach(bc.FindBestController(
     validationID=simple_maze_env.VALIDATION_MODE,
     testID=None,
     unique_fname=fname))
 
 # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a 
 # "validation epoch" between each training epoch. For each validation epoch, we want also to display the sum of all 
 # rewards obtained, hence the showScore=True. Finally, we want to call the summarizePerformance method of ALE_env 
 # every [parameters.period_btw_summary_perfs] *validation* epochs.
 agent.attach(bc.InterleavedTestEpochController(
     id=simple_maze_env.VALIDATION_MODE, 
     epoch_length=parameters.steps_per_test,
     periodicity=1,
     show_score=True,
     summarize_every=1))
 
 # --- Run the experiment ---
 try:
Esempio n. 2
0
                             periodicity=1,
                             reset_every='none'))

    # We wish to discover, among all versions of our neural network (i.e., after every training epoch), which one
    # seems to generalize the better, thus which one has the highest validation score. However we also want to keep
    # track of a "true generalization score", the "test score". Indeed, what if we overfit the validation score ?
    # To achieve these goals, one can use the FindBestController along two InterleavedTestEpochControllers, one for
    # each mode (validation and test). It is important that the validationID and testID are the same than the id
    # argument of the two InterleavedTestEpochControllers (implementing the validation mode and test mode
    # respectively). The FindBestController will dump on disk the validation and test scores for each and every
    # network, as well as the structure of the neural network having the best validation score. These dumps can then
    # used to plot the evolution of the validation and test scores (see below) or simply recover the resulting neural
    # network for your application.
    agent.attach(
        bc.FindBestController(validationID=MG_two_storages_env.VALIDATION_MODE,
                              testID=MG_two_storages_env.TEST_MODE,
                              unique_fname=fname))

    # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a
    # "validation epoch" between each training epoch ("one of two epochs", hence the periodicity=2). We do not want
    # these validation epoch to interfere with the training of the agent, which is well established by the
    # TrainerController, EpsilonController and alike, nor with its testing (see next controller). Therefore, we will
    # disable these controllers for the whole duration of the validation epochs interleaved this way, using the
    # controllersToDisable argument of the InterleavedTestEpochController. For each validation epoch, we want also to
    # display the sum of all rewards obtained, hence the showScore=True. Finally, we never want this controller to call
    # the summarizePerformance method of MG_two_storage_env.
    agent.attach(
        bc.InterleavedTestEpochController(
            id=MG_two_storages_env.VALIDATION_MODE,
            epoch_length=parameters.steps_per_test,
            controllers_to_disable=[0, 1, 2, 3, 4, 7],
        bc.TrainerController(evaluate_on='action',
                             periodicity=parameters.update_frequency,
                             show_episode_avg_V_value=True,
                             show_avg_Bellman_residual=True))

    # We wish to discover, among all versions of our neural network (i.e., after every training epoch), which one
    # has the highest validation score.
    # To achieve this goal, one can use the FindBestController along with an InterleavedTestEpochControllers. It is
    # important that the validationID is the same than the id argument of the InterleavedTestEpochController.
    # The FindBestController will dump on disk the validation scores for each and every network, as well as the
    # structure of the neural network having the best validation score. These dumps can then used to plot the evolution
    # of the validation and test scores (see below) or simply recover the resulting neural network for your
    # application.
    agent.attach(
        bc.FindBestController(validationID=simple_maze_env.VALIDATION_MODE,
                              testID=None,
                              unique_fname=fname,
                              hasReward=False))

    # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a
    # "validation epoch" between each training epoch. For each validation epoch, we want also to display the sum of all
    # rewards obtained, hence the showScore=True. Finally, we want to call the summarizePerformance method of ALE_env
    # every [parameters.period_btw_summary_perfs] *validation* epochs.
    agent.attach(
        bc.InterleavedTestEpochController(
            id=simple_maze_env.VALIDATION_MODE,
            epoch_length=parameters.steps_per_test,
            periodicity=1,
            show_score=True,
            summarize_every=1))

    # --- Run the experiment ---
Esempio n. 4
0
                             evaluate_on='action',
                             periodicity=1,
                             reset_every='none'))

    # We wish to discover, among all versions of our neural network (i.e., after every training epoch), which one
    # seems to generalize the better, thus which one has the highest validation score. Here, we do not care about the
    # "true generalization score", or "test score".
    # To achieve this goal, one can use the FindBestController along with an InterleavedTestEpochControllers. It is
    # important that the validationID is the same than the id argument of the InterleavedTestEpochController.
    # The FindBestController will dump on disk the validation scores for each and every network, as well as the
    # structure of the neural network having the best validation score. These dumps can then used to plot the evolution
    # of the validation and test scores (see below) or simply recover the resulting neural network for your
    # application.
    agent.attach(
        bc.FindBestController(validationID=ALE_env.VALIDATION_MODE,
                              testID=None,
                              unique_fname=fname))

    # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a
    # "validation epoch" between each training epoch ("one of two epochs", hence the periodicity=2). We do not want
    # these validation epoch to interfere with the training of the agent, which is well established by the
    # TrainerController, EpsilonController and alike. Therefore, we will disable these controllers for the whole
    # duration of the validation epochs interleaved this way, using the controllersToDisable argument of the
    # InterleavedTestEpochController. For each validation epoch, we want also to display the sum of all rewards
    # obtained, hence the showScore=True. Finally, we want to call the summarizePerformance method of ALE_env every
    # [parameters.period_btw_summary_perfs] *validation* epochs.
    agent.attach(
        bc.InterleavedTestEpochController(
            id=ALE_env.VALIDATION_MODE,
            epoch_length=parameters.steps_per_test,
            controllers_to_disable=[0, 1, 2, 3, 4],
Esempio n. 5
0
# TODO : best algorithm, hyperparameter tuning
if args.network == 'DQN':
    network = MyQNetwork(environment=env,
                         batch_size=32,
                         freeze_interval=args.epochs[1],
                         double_Q=True,
                         random_state=rng)
    agent = NeuralAgent(env,
                        network,
                        replay_memory_size=min(
                            int(args.epochs[0] * args.epochs[1] * 1.1),
                            100000),
                        batch_size=32,
                        random_state=rng)
    agent.setDiscountFactor(0.95)
    agent.attach(bc.FindBestController(validationID=0,
                                       unique_fname=args.fname))
    agent.attach(bc.VerboseController())
    agent.attach(bc.TrainerController())
    agent.attach(
        bc.EpsilonController(initial_e=0.8,
                             e_decays=args.epochs[0] * args.epochs[1],
                             e_min=0.2))
    agent.attach(
        bc.LearningRateController(args.learning_rate[0], args.learning_rate[1],
                                  args.learning_rate[2]))
    agent.attach(
        bc.InterleavedTestEpochController(epoch_length=1000,
                                          controllers_to_disable=[1, 2, 3, 4]))
elif args.network == 'DDPG':
    network = MyACNetwork(environment=env,
                          batch_size=32,
Esempio n. 6
0
                             periodicity=1,
                             reset_every='none'))

    # We wish to discover, among all versions of our neural network (i.e., after every training epoch), which one
    # seems to generalize the best, thus which one has the highest validation score. However we also want to keep
    # track of a "true generalization score", the "test score". Indeed, what if we overfit the validation score ?
    # To achieve these goals, one can use the FindBestController along two InterleavedTestEpochControllers, one for
    # each mode (validation and test). It is important that the validationID and testID are the same than the id
    # argument of the two InterleavedTestEpochControllers (implementing the validation mode and test mode
    # respectively). The FindBestController will dump on disk the validation and test scores for each and every
    # network, as well as the structure of the neural network having the best validation score. These dumps can then
    # used to plot the evolution of the validation and test scores (see below) or simply recover the resulting neural
    # network for your application.
    agent.attach(
        bc.FindBestController(validationID=env.VALIDATION_MODE,
                              testID=env.TEST_MODE,
                              unique_fname=fname))

    # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a
    # "validation epoch" between each training epoch (hence the periodicity=1). For each validation epoch, we want also
    # to  display the sum of all rewards obtained, hence the showScore=True. Finally, we never want this controller to
    # call the summarizePerformance method of MG_two_storage_env.
    agent.attach(
        bc.InterleavedTestEpochController(
            id=env.VALIDATION_MODE,
            epoch_length=parameters.steps_per_epoch,
            periodicity=1,
            show_score=True,
            summarize_every=-1))

    # Besides inserting a validation epoch (required if one wants to find the best neural network over all training
Esempio n. 7
0
                             e_min=parameters.epsilon_min,
                             evaluate_on='action',
                             periodicity=1,
                             reset_every='none'))

    # We wish to discover, among all versions of our neural network (i.e., after every training epoch), which one
    # seems to generalize the better, thus which one has the highest validation score. Here, we do not care about the
    # "true generalization score", or "test score".
    # To achieve this goal, one can use the FindBestController along with an InterleavedTestEpochControllers. It is
    # important that the validationID is the same than the id argument of the InterleavedTestEpochController.
    # The FindBestController will dump on disk the validation scores for each and every network, as well as the
    # structure of the neural network having the best validation score. These dumps can then used to plot the evolution
    # of the validation and test scores (see below) or simply recover the resulting neural network for your
    # application.
    agent.attach(
        bc.FindBestController(validationID=2, testID=None, unique_fname=fname))

    # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a
    # "validation epoch" between each training epoch ("one of two epochs", hence the periodicity=2). We do not want
    # these validation epoch to interfere with the training of the agent, which is well established by the
    # TrainerController, EpsilonController and alike. Therefore, we will disable these controllers for the whole
    # duration of the validation epochs interleaved this way, using the controllersToDisable argument of the
    # InterleavedTestEpochController. For each validation epoch, we want also to display the sum of all rewards
    # obtained, hence the showScore=True. Finally, we want to call the summarizePerformance method of ALE_env every
    # [parameters.period_btw_summary_perfs] *validation* epochs.
    agent.attach(
        bc.InterleavedTestEpochController(
            id=2,
            epoch_length=parameters.steps_per_test,
            periodicity=1,
            show_score=True,