コード例 #1
0
    h = hash(vars(parameters), hash_name="sha1")
    fname = "MG2S_" + h
    print("The parameters hash is: {}".format(h))
    print("The parameters are: {}".format(parameters))

    # --- Bind controllers to the agent ---
    # Before every training epoch (periodicity=1), we want to print a summary of the agent's epsilon, discount and
    # learning rate as well as the training epoch number.
    agent.attach(bc.VerboseController(evaluate_on='epoch', periodicity=1))

    # During training epochs, we want to train the agent after every [parameters.update_frequency] action it takes.
    # Plus, we also want to display after each training episode (!= than after every training) the average bellman
    # residual and the average of the V values obtained during the last episode, hence the two last arguments.
    agent.attach(
        bc.TrainerController(evaluate_on='action',
                             periodicity=parameters.update_frequency,
                             show_episode_avg_V_value=True,
                             show_avg_Bellman_residual=True))

    # Every epoch end, one has the possibility to modify the learning rate using a LearningRateController. Here we
    # wish to update the learning rate after every training epoch (periodicity=1), according to the parameters given.
    agent.attach(
        bc.LearningRateController(
            initial_learning_rate=parameters.learning_rate,
            learning_rate_decay=parameters.learning_rate_decay,
            periodicity=1))

    # Same for the discount factor.
    agent.attach(
        bc.DiscountFactorController(
            initial_discount_factor=parameters.discount,
            discount_factor_growth=parameters.discount_inc,
コード例 #2
0
    # --- Instantiate environment ---
    env = Toy_env(rng)

    # --- Instantiate qnetwork ---
    qnetwork = MyQNetwork(environment=env, random_state=rng)

    # --- Instantiate agent ---
    agent = NeuralAgent(env, qnetwork, random_state=rng)

    # --- Bind controllers to the agent ---
    # Before every training epoch, we want to print a summary of the agent's epsilon, discount and
    # learning rate as well as the training epoch number.
    agent.attach(bc.VerboseController())

    # During training epochs, we want to train the agent after every action it takes.
    # Plus, we also want to display after each training episode (!= than after every training) the average bellman
    # residual and the average of the V values obtained during the last episode.
    agent.attach(bc.TrainerController())

    # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a
    # "test epoch" between each training epoch. We do not want these test epoch to interfere with the training of the
    # agent. Therefore, we will disable these controllers for the whole duration of the test epochs interleaved this
    # way, using the controllersToDisable argument of the InterleavedTestEpochController. The value of this argument
    # is a list of the indexes of all controllers to disable, their index reflecting in which order they were added.
    agent.attach(
        bc.InterleavedTestEpochController(epoch_length=500,
                                          controllers_to_disable=[0, 1]))

    # --- Run the experiment ---
    agent.run(n_epochs=100, epoch_length=1000)
コード例 #3
0
    fname = "PLE_" + h
    print("The parameters hash is: {}".format(h))
    print("The parameters are: {}".format(parameters))

    # --- Bind controllers to the agent ---
    # Before every training epoch (periodicity=1), we want to print a summary of the agent's epsilon, discount and
    # learning rate as well as the training epoch number.
    agent.attach(bc.VerboseController(evaluate_on='epoch', periodicity=1))

    # During training epochs, we want to train the agent after every [parameters.update_frequency] action it takes.
    # Plus, we also want to display after each training episode (!= than after every training) the average bellman
    # residual and the average of the V values obtained during the last episode, hence the two last arguments.
    agent.attach(
        bc.TrainerController(evaluate_on='action',
                             periodicity=parameters.update_frequency,
                             show_episode_avg_V_value=False,
                             show_avg_Bellman_residual=False,
                             nb_train=parameters.nb_train_per_epoch))

    # Every epoch end, one has the possibility to modify the learning rate using a LearningRateController. Here we
    # wish to update the learning rate after every training epoch (periodicity=1), according to the parameters given.
    agent.attach(
        bc.LearningRateController(
            initial_learning_rate=parameters.learning_rate,
            learning_rate_decay=parameters.learning_rate_decay,
            periodicity=1))

    # Same for the discount factor.
    agent.attach(
        bc.DiscountFactorController(
            initial_discount_factor=parameters.discount,
コード例 #4
0
    print("The parameters hash is: {}".format(h))
    print("The parameters are: {}".format(parameters))

    # --- Bind controllers to the agent ---
    # Before every training epoch (periodicity=1), we want to print a summary of the agent's epsilon, discount and 
    # learning rate as well as the training epoch number.
    agent.attach(bc.VerboseController(
        evaluateOn='epoch', 
        periodicity=1))
    
    # During training epochs, we want to train the agent after every [parameters.update_frequency] action it takes.
    # Plus, we also want to display after each training episode (!= than after every training) the average bellman
    # residual and the average of the V values obtained during the last episode, hence the two last arguments.
    agent.attach(bc.TrainerController(
        evaluateOn='action', 
        periodicity=parameters.update_frequency, 
        showEpisodeAvgVValue=True, 
        showAvgBellmanResidual=True))
    
    # Every epoch end, one has the possibility to modify the learning rate using a LearningRateController. Here we 
    # wish to update the learning rate after every training epoch (periodicity=1), according to the parameters given.
    agent.attach(bc.LearningRateController(
        initialLearningRate=parameters.learning_rate, 
        learningRateDecay=parameters.learning_rate_decay,
        periodicity=1))
    
    # Same for the discount factor.
    agent.attach(bc.DiscountFactorController(
        initialDiscountFactor=parameters.discount, 
        discountFactorGrowth=parameters.discount_inc, 
        discountFactorMax=parameters.discount_max,
コード例 #5
0
    def __init__(self,
                 inputDims,
                 q_network,
                 actions,
                 file='',
                 replay_memory_size=Parameters.REPLAY_MEMORY_SIZE,
                 replay_start_size=Parameters.BATCH_SIZE,
                 batch_size=Parameters.BATCH_SIZE,
                 random_state=np.random.RandomState(),
                 exp_priority=0,
                 batch_type='sequential',
                 train_policy=None,
                 test_policy=None,
                 only_full_history=True,
                 reward_as_input=False):

        CompleteLearner.__init__(self, actions, file)
        self.polfile = open(self.file + 'policy.txt', "w")
        # if replay_start_size == None:
        #     replay_start_size = max(inputDims[i][0] for i in range(len(inputDims)))
        # elif replay_start_size < max(inputDims[i][0] for i in range(len(inputDims))):
        #     raise AgentError("Replay_start_size should be greater than the biggest history of a state.")

        self._controllers = []

        # --- Bind controllers to the agent ---
        # For comments, please refer to run_toy_env.py
        self.attach(bc.VerboseController(evaluate_on='epoch', periodicity=1))

        self.attach(
            bc.TrainerController(evaluate_on='action',
                                 periodicity=Parameters.UPDATE_FREQUENCY,
                                 show_episode_avg_V_value=True,
                                 show_avg_Bellman_residual=True))

        self.attach(
            bc.LearningRateController(
                initial_learning_rate=Parameters.LEARNING_RATE,
                learning_rate_decay=Parameters.LEARNING_RATE_DECAY,
                periodicity=10000))

        self.attach(
            bc.DiscountFactorController(
                initial_discount_factor=Parameters.DISCOUNT,
                discount_factor_growth=Parameters.DISCOUNT_INC,
                discount_factor_max=Parameters.DISCOUNT_MAX,
                periodicity=10000))

        self.attach(
            bc.EpsilonController(initial_e=Parameters.EPSILON_START,
                                 e_decays=Parameters.EPSILON_DECAY,
                                 e_min=Parameters.EPSILON_MIN,
                                 evaluate_on='action',
                                 periodicity=1000,
                                 reset_every='none'))

        # self.attach(bc.InterleavedTestEpochController(
        #     id=0,
        #     epoch_length=Parameters.STEPS_PER_TEST,
        #     controllers_to_disable=[0, 1, 2, 3, 4],
        #     periodicity=2,
        #     show_score=True,
        #     summarize_every=-1))

        self.obs = []
        self.reward_as_input = reward_as_input
        self._network = q_network
        self._replay_memory_size = replay_memory_size
        self._replay_start_size = replay_start_size  # make sure you gather this many observations before learning
        self._batch_size = batch_size
        self._batch_type = batch_type
        self._random_state = random_state
        self._exp_priority = exp_priority
        self._only_full_history = only_full_history
        #inputDimensions, n_actions, observation_type = np.float32, random_state = None, max_size = 1000, batch_type = 'random', only_full_history = True
        self._dataset = DataSet(inputDimensions=inputDims,
                                n_actions=len(actions),
                                max_size=replay_memory_size,
                                random_state=random_state,
                                batch_type=self._batch_type,
                                only_full_history=self._only_full_history)
        self._tmp_dataset = None  # Will be created by startTesting() when necessary
        self._mode = -1
        self._mode_epochs_length = 0
        self._total_mode_reward = 0
        self._training_loss_averages = []
        self._Vs_on_last_episode = []
        #self._in_episode = False

        self._selected_action = -1
        self._state = []
        for i in range(len(inputDims)):
            self._state.append(np.zeros(inputDims[i], dtype=config.floatX))
        if (train_policy == None):
            self._train_policy = EpsilonGreedyPolicy(q_network, len(actions),
                                                     random_state, 0.05)
        else:
            self._train_policy = train_policy
        if (test_policy == None):
            self._test_policy = EpsilonGreedyPolicy(q_network, len(actions),
                                                    random_state, 0.)
        else:
            self._test_policy = test_policy

        self.initEpisode()