Python FixedDecay Examples

Programming Language: Python

Namespace/Package Name: rlagents.functions.decay

Class/Type: FixedDecay

Examples at hotexamples.com: 8

Python FixedDecay - 8 examples found. These are the top rated real world Python examples of rlagents.functions.decay.FixedDecay extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

FixedDecay(8)

Frequently Used Methods

FixedDecay (8)

Example #1

Show file

def main():
    # All agents use  a tabular model will initial values of 0
    # Updates are done via TD learning with a fixed learning rate
    # Action_FA of discrete max means the agent chooses the action with the highest utility from a discrete array
    base_agent = Agent(model=TabularModel(mean=0, std=0),
                       action_fa=DiscreteMaxFA(),
                       optimiser=TemporalDifference(learning_rate=FixedDecay(0.2)))

    # Randomly select the next action
    random_agent = copy.deepcopy(base_agent)
    random_agent.exploration = RandomExploration()

    # Always select the best action seen so far (is default behaviour for agents)
    greedy_agent = copy.deepcopy(base_agent)

    # Always select the best action seen so far with optimistic starting values
    optimistic_greedy_agent = copy.deepcopy(base_agent)
    optimistic_greedy_agent.model = TabularModel(mean=1, std=0)

    # Select a random action with decaying likelihood
    egreedy_agent = copy.deepcopy(base_agent)
    egreedy_agent.exploration = EpsilonGreedy(FixedDecay(1, 0.995, 0.01))

    # Select a random action with fixed likelihood
    fixed_egreedy_agent = copy.deepcopy(base_agent)
    fixed_egreedy_agent.exploration = EpsilonGreedy(FixedDecay(0.2))

    # Explores using softmax
    boltzmann_agent = copy.deepcopy(base_agent)
    boltzmann_agent.exploration = Softmax(FixedDecay(2, 0.995, 0.1))

    agents = [random_agent, greedy_agent, optimistic_greedy_agent, egreedy_agent, fixed_egreedy_agent, boltzmann_agent]
    labels = ['Random', 'Greedy', 'Optimistic Greedy', 'E-Greedy Decay', 'E-Greedy Fixed', 'Boltzmann']

    agent_reward = []
    max_reward = []
    episodes = 100

    for agent in agents:
        path = "/tmp/rlagents/"
        am = AgentManager(agent=agent)
        em = EnvManager('BanditTenArmedUniformDistributedReward-v0', am)
        em.run(n_episodes=episodes, print_stats=False, path=path, video_callable=False)

        max_reward.append(max(em.env.r_dist))
        results = load_results(path)
        agent_reward.append(results['episode_rewards'])

    for i, ar in enumerate(agent_reward):
        percent_correct = [agent_reward[i][:j].count(max_reward[i])/float(j) for j in range(1, episodes)]
        plt.plot(range(1, episodes), percent_correct, label=labels[i])

    plt.xlabel('Steps')
    plt.ylabel('% Optimal Arm Pulls')
    plt.ylim(-0.2, 1.5)
    plt.legend(loc=2)

    plt.show()

Example #2

Show file

    def shift(self, s):
        if s is None:
            s = FixedDecay(1, decay=0.995, minimum=0.01)

        if not isinstance(s, DecayBase):
            raise TypeError("Shift must be of type DecayBase")

        self._shift = s

Example #3

Show file

    def temperature(self, t):
        if t is None:
            t = FixedDecay(10, decay=0.997, minimum=0.1)

        if not isinstance(t, DecayBase):
            raise TypeError("Temperature must be of type DecayBase")

        self._temperature = t

Example #4

Show file

    def spread(self, s):
        if s is None:
            s = FixedDecay(0.05, 0, 0)

        if not isinstance(s, DecayBase):
            raise TypeError("Spread not a valid DecayBase")

        self._spread = s

Example #5

Show file

    def learning_rate(self, lr):
        if not isinstance(lr, DecayBase):
            lr = FixedDecay(1, decay=0.995, minimum=0.05)
            warnings.warn('Learning Rate type invalid, using default. ({0})'.format(lr))

        self._learning_rate = lr

Example #6

Show file

    def decay(self, d):
        if not isinstance(d, DecayBase):
            d = FixedDecay(0.1, 1, 0.1)
            warnings.warn("Decay type invalid, using default. {0}".format(d))

        self._decay = d

Example #7

Show file

    def test_update(self):
        exploration = EpsilonGreedy(FixedDecay(1, 0.95, 0.1))
        exploration.update()

        self.assertEqual(exploration.value, 0.95)

Example #8

Show file

 def test_epsilon_property(self):
     exploration = EpsilonGreedy(FixedDecay(0.2, 0.95, 0.1))
     self.assertEqual(0.2, exploration.value)