Ejemplo n.º 1
0
  def test_learningRate(self):
    # Disables learning from Q* to simplifies testing.
    qfunc = qfunc_impl.MemoizationQFunction(
      action_space_size=2,
      discount_factor=0.0,
      learning_rate=0.9,
    )
    qfunc._protected_SetValues(
      numpy.array([
        [1, 2, 3],
        [4, 5, 6],
      ]),
      numpy.array([
        [0.5, 0.6],
        [0.3, 0.7],
      ]))
    qfunc.UpdateFromTransitions([base.Transition(
      s=numpy.array([[1, 2, 3]]),
      a=numpy.array([[0, 1]]),
      r=1.0,
      sp=numpy.array([[2, 2, 2]]),
    )])

    # The new values for state (1,2,3) should be:
    # - action (1,0): 0.5, since it's not changed.
    # - action (0,1): (1-0.9) * 0.6 + 0.9 * 1.0 = 0.96.
    numpy_util.TestUtil.AssertArrayEqual(
      numpy.array([[0.5, 0.96]]),
      qfunc.GetValues(numpy.array([[1, 2, 3]])))
Ejemplo n.º 2
0
 def setUp(self) -> None:
   self.env = environment_impl.SingleStateEnvironment(
     action_space_size=1, step_limit=10)
   self.qfunc = qfunc_impl.MemoizationQFunction(
     action_space_size=3,
     discount_factor=0.9,
     learning_rate=0.9)
   self.runner = runner_impl.SimpleRunner()
Ejemplo n.º 3
0
 def setUp(self) -> None:
   self.env = environment_impl.SingleStateEnvironment(
     action_space_size=1, step_limit=10)
   self.qfunc = qfunc_impl.MemoizationQFunction(
     action_space_size=3,
     discount_factor=0.9,
     learning_rate=0.9)
   self.policy = policy_impl.GreedyPolicy()
Ejemplo n.º 4
0
    def _RunEnv(gym_env):
        env = environment_impl.GymEnvironment(gym_env)
        env.SetGymEnvMaxEpisodeSteps(10)
        qfunc = qfunc_impl.MemoizationQFunction(
            action_space_size=env.GetActionSpaceSize())
        env.Reset()
        policy = policy_impl.GreedyPolicyWithRandomness(epsilon=1.0)

        runner_impl.SimpleRunner().Run(env=env,
                                       qfunc=qfunc,
                                       policy=policy,
                                       num_of_episodes=1)
Ejemplo n.º 5
0
    def test_saveLoad(self):
        tmp_file = '/tmp/MemoizationQFunctionTest_savedata.tmp'
        self.qfunc._SetValues(self.states, self.values)
        self.qfunc.Save(tmp_file)
        qfunc = qfunc_impl.MemoizationQFunction(action_space_size=2)
        qfunc.Load(tmp_file)

        self.assertCountEqual(qfunc._storage.keys(),
                              self.qfunc._storage.keys())
        for k in qfunc._storage.keys():
            numpy_util.TestUtil.AssertArrayEqual(qfunc._storage[k],
                                                 self.qfunc._storage[k])
Ejemplo n.º 6
0
    def setUp(self) -> None:
        # State space size is 3; Action space size is 2.
        self.qfunc = qfunc_impl.MemoizationQFunction(action_space_size=2)

        self.states = numpy.array([
            [1, 2, 3],
            [4, 5, 6],
        ])

        self.values = numpy.array([
            [0.5, 0.5],
            [0.3, 0.7],
        ])
Ejemplo n.º 7
0
  def setUp(self) -> None:
    # State space size is 3; Action space size is 2.
    # Learning from old values is disabled in the majority of tests.
    self.qfunc = qfunc_impl.MemoizationQFunction(
      action_space_size=2,
      discount_factor=0.5,
      learning_rate=1.0,
    )

    self.states = numpy.array([
      [1, 2, 3],
      [4, 5, 6],
    ])

    self.actions = numpy.array([
      [1, 0],
      [0, 1],
    ])

    self.values = numpy.array([
      [0.5, 0.5],
      [0.3, 0.7],
    ])