Ejemplo n.º 1
0
  def testMultiIOTaskManager(self):
    maxlen = 100
    padchr = '['
    task = code_tasks.make_paper_task(
        'print', timestep_limit=maxlen, do_code_simplification=False)
    reward_fns = task.rl_batch(1)
    r = reward_fns[0]
    self.assertClose(
        r(pad('++++++++.---.+++++++...', maxlen, padchr)).episode_rewards[-1],
        0.2444)
    self.assertClose(
        r(pad('++++++++.---.+++++++..+++.',
              maxlen, padchr)).episode_rewards[-1],
        1.0)

    task = code_tasks.make_paper_task(
        'print', timestep_limit=maxlen, do_code_simplification=True)
    reward_fns = task.rl_batch(1)
    r = reward_fns[0]
    self.assertClose(
        r('++++++++.---.+++++++...').episode_rewards[-1],
        0.2444)
    self.assertClose(
        r('++++++++.---.+++++++..+++.').episode_rewards[-1],
        0.935)
    self.assertClose(
        r(pad('++++++++.---.+++++++..+++.',
              maxlen, padchr)).episode_rewards[-1],
        0.75)

    task = code_tasks.make_paper_task(
        'reverse', timestep_limit=maxlen, do_code_simplification=False)
    reward_fns = task.rl_batch(1)
    r = reward_fns[0]
    self.assertClose(
        r(pad('>,>,>,.<.<.<.', maxlen, padchr)).episode_rewards[-1],
        0.1345)
    self.assertClose(
        r(pad(',[>,]+[,<.]', maxlen, padchr)).episode_rewards[-1],
        1.0)

    task = code_tasks.make_paper_task(
        'reverse', timestep_limit=maxlen, do_code_simplification=True)
    reward_fns = task.rl_batch(1)
    r = reward_fns[0]
    self.assertClose(r('>,>,>,.<.<.<.').episode_rewards[-1], 0.1324)
    self.assertClose(r(',[>,]+[,<.]').episode_rewards[-1], 0.9725)
    self.assertClose(
        r(pad(',[>,]+[,<.]', maxlen, padchr)).episode_rewards[-1],
        0.75)
Ejemplo n.º 2
0
 def testKnownCodeBaseTask(self):
     maxlen = 100
     padchr = '['
     task = code_tasks.make_paper_task('shift-left',
                                       timestep_limit=maxlen,
                                       do_code_simplification=False)
     reward_fns = task.rl_batch(1)
     r = reward_fns[0]
     self.assertClose(
         r(pad(',>,[.,]<.,.', maxlen, padchr)).episode_rewards[-1], 1.0)
Ejemplo n.º 3
0
 def testKnownCodeBaseTask(self):
   maxlen = 100
   padchr = '['
   task = code_tasks.make_paper_task(
       'shift-left', timestep_limit=maxlen, do_code_simplification=False)
   reward_fns = task.rl_batch(1)
   r = reward_fns[0]
   self.assertClose(
       r(pad(',>,[.,]<.,.', maxlen, padchr)).episode_rewards[-1],
       1.0)