h = hash(vars(parameters), hash_name="sha1") fname = "MG2S_" + h print("The parameters hash is: {}".format(h)) print("The parameters are: {}".format(parameters)) # --- Bind controllers to the agent --- # Before every training epoch (periodicity=1), we want to print a summary of the agent's epsilon, discount and # learning rate as well as the training epoch number. agent.attach(bc.VerboseController(evaluate_on='epoch', periodicity=1)) # During training epochs, we want to train the agent after every [parameters.update_frequency] action it takes. # Plus, we also want to display after each training episode (!= than after every training) the average bellman # residual and the average of the V values obtained during the last episode, hence the two last arguments. agent.attach( bc.TrainerController(evaluate_on='action', periodicity=parameters.update_frequency, show_episode_avg_V_value=True, show_avg_Bellman_residual=True)) # Every epoch end, one has the possibility to modify the learning rate using a LearningRateController. Here we # wish to update the learning rate after every training epoch (periodicity=1), according to the parameters given. agent.attach( bc.LearningRateController( initial_learning_rate=parameters.learning_rate, learning_rate_decay=parameters.learning_rate_decay, periodicity=1)) # Same for the discount factor. agent.attach( bc.DiscountFactorController( initial_discount_factor=parameters.discount, discount_factor_growth=parameters.discount_inc,
# --- Instantiate environment --- env = Toy_env(rng) # --- Instantiate qnetwork --- qnetwork = MyQNetwork(environment=env, random_state=rng) # --- Instantiate agent --- agent = NeuralAgent(env, qnetwork, random_state=rng) # --- Bind controllers to the agent --- # Before every training epoch, we want to print a summary of the agent's epsilon, discount and # learning rate as well as the training epoch number. agent.attach(bc.VerboseController()) # During training epochs, we want to train the agent after every action it takes. # Plus, we also want to display after each training episode (!= than after every training) the average bellman # residual and the average of the V values obtained during the last episode. agent.attach(bc.TrainerController()) # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a # "test epoch" between each training epoch. We do not want these test epoch to interfere with the training of the # agent. Therefore, we will disable these controllers for the whole duration of the test epochs interleaved this # way, using the controllersToDisable argument of the InterleavedTestEpochController. The value of this argument # is a list of the indexes of all controllers to disable, their index reflecting in which order they were added. agent.attach( bc.InterleavedTestEpochController(epoch_length=500, controllers_to_disable=[0, 1])) # --- Run the experiment --- agent.run(n_epochs=100, epoch_length=1000)
fname = "PLE_" + h print("The parameters hash is: {}".format(h)) print("The parameters are: {}".format(parameters)) # --- Bind controllers to the agent --- # Before every training epoch (periodicity=1), we want to print a summary of the agent's epsilon, discount and # learning rate as well as the training epoch number. agent.attach(bc.VerboseController(evaluate_on='epoch', periodicity=1)) # During training epochs, we want to train the agent after every [parameters.update_frequency] action it takes. # Plus, we also want to display after each training episode (!= than after every training) the average bellman # residual and the average of the V values obtained during the last episode, hence the two last arguments. agent.attach( bc.TrainerController(evaluate_on='action', periodicity=parameters.update_frequency, show_episode_avg_V_value=False, show_avg_Bellman_residual=False, nb_train=parameters.nb_train_per_epoch)) # Every epoch end, one has the possibility to modify the learning rate using a LearningRateController. Here we # wish to update the learning rate after every training epoch (periodicity=1), according to the parameters given. agent.attach( bc.LearningRateController( initial_learning_rate=parameters.learning_rate, learning_rate_decay=parameters.learning_rate_decay, periodicity=1)) # Same for the discount factor. agent.attach( bc.DiscountFactorController( initial_discount_factor=parameters.discount,
print("The parameters hash is: {}".format(h)) print("The parameters are: {}".format(parameters)) # --- Bind controllers to the agent --- # Before every training epoch (periodicity=1), we want to print a summary of the agent's epsilon, discount and # learning rate as well as the training epoch number. agent.attach(bc.VerboseController( evaluateOn='epoch', periodicity=1)) # During training epochs, we want to train the agent after every [parameters.update_frequency] action it takes. # Plus, we also want to display after each training episode (!= than after every training) the average bellman # residual and the average of the V values obtained during the last episode, hence the two last arguments. agent.attach(bc.TrainerController( evaluateOn='action', periodicity=parameters.update_frequency, showEpisodeAvgVValue=True, showAvgBellmanResidual=True)) # Every epoch end, one has the possibility to modify the learning rate using a LearningRateController. Here we # wish to update the learning rate after every training epoch (periodicity=1), according to the parameters given. agent.attach(bc.LearningRateController( initialLearningRate=parameters.learning_rate, learningRateDecay=parameters.learning_rate_decay, periodicity=1)) # Same for the discount factor. agent.attach(bc.DiscountFactorController( initialDiscountFactor=parameters.discount, discountFactorGrowth=parameters.discount_inc, discountFactorMax=parameters.discount_max,
def __init__(self, inputDims, q_network, actions, file='', replay_memory_size=Parameters.REPLAY_MEMORY_SIZE, replay_start_size=Parameters.BATCH_SIZE, batch_size=Parameters.BATCH_SIZE, random_state=np.random.RandomState(), exp_priority=0, batch_type='sequential', train_policy=None, test_policy=None, only_full_history=True, reward_as_input=False): CompleteLearner.__init__(self, actions, file) self.polfile = open(self.file + 'policy.txt', "w") # if replay_start_size == None: # replay_start_size = max(inputDims[i][0] for i in range(len(inputDims))) # elif replay_start_size < max(inputDims[i][0] for i in range(len(inputDims))): # raise AgentError("Replay_start_size should be greater than the biggest history of a state.") self._controllers = [] # --- Bind controllers to the agent --- # For comments, please refer to run_toy_env.py self.attach(bc.VerboseController(evaluate_on='epoch', periodicity=1)) self.attach( bc.TrainerController(evaluate_on='action', periodicity=Parameters.UPDATE_FREQUENCY, show_episode_avg_V_value=True, show_avg_Bellman_residual=True)) self.attach( bc.LearningRateController( initial_learning_rate=Parameters.LEARNING_RATE, learning_rate_decay=Parameters.LEARNING_RATE_DECAY, periodicity=10000)) self.attach( bc.DiscountFactorController( initial_discount_factor=Parameters.DISCOUNT, discount_factor_growth=Parameters.DISCOUNT_INC, discount_factor_max=Parameters.DISCOUNT_MAX, periodicity=10000)) self.attach( bc.EpsilonController(initial_e=Parameters.EPSILON_START, e_decays=Parameters.EPSILON_DECAY, e_min=Parameters.EPSILON_MIN, evaluate_on='action', periodicity=1000, reset_every='none')) # self.attach(bc.InterleavedTestEpochController( # id=0, # epoch_length=Parameters.STEPS_PER_TEST, # controllers_to_disable=[0, 1, 2, 3, 4], # periodicity=2, # show_score=True, # summarize_every=-1)) self.obs = [] self.reward_as_input = reward_as_input self._network = q_network self._replay_memory_size = replay_memory_size self._replay_start_size = replay_start_size # make sure you gather this many observations before learning self._batch_size = batch_size self._batch_type = batch_type self._random_state = random_state self._exp_priority = exp_priority self._only_full_history = only_full_history #inputDimensions, n_actions, observation_type = np.float32, random_state = None, max_size = 1000, batch_type = 'random', only_full_history = True self._dataset = DataSet(inputDimensions=inputDims, n_actions=len(actions), max_size=replay_memory_size, random_state=random_state, batch_type=self._batch_type, only_full_history=self._only_full_history) self._tmp_dataset = None # Will be created by startTesting() when necessary self._mode = -1 self._mode_epochs_length = 0 self._total_mode_reward = 0 self._training_loss_averages = [] self._Vs_on_last_episode = [] #self._in_episode = False self._selected_action = -1 self._state = [] for i in range(len(inputDims)): self._state.append(np.zeros(inputDims[i], dtype=config.floatX)) if (train_policy == None): self._train_policy = EpsilonGreedyPolicy(q_network, len(actions), random_state, 0.05) else: self._train_policy = train_policy if (test_policy == None): self._test_policy = EpsilonGreedyPolicy(q_network, len(actions), random_state, 0.) else: self._test_policy = test_policy self.initEpisode()