def testMultiIOTaskManager(self): maxlen = 100 padchr = '[' task = code_tasks.make_paper_task( 'print', timestep_limit=maxlen, do_code_simplification=False) reward_fns = task.rl_batch(1) r = reward_fns[0] self.assertClose( r(pad('++++++++.---.+++++++...', maxlen, padchr)).episode_rewards[-1], 0.2444) self.assertClose( r(pad('++++++++.---.+++++++..+++.', maxlen, padchr)).episode_rewards[-1], 1.0) task = code_tasks.make_paper_task( 'print', timestep_limit=maxlen, do_code_simplification=True) reward_fns = task.rl_batch(1) r = reward_fns[0] self.assertClose( r('++++++++.---.+++++++...').episode_rewards[-1], 0.2444) self.assertClose( r('++++++++.---.+++++++..+++.').episode_rewards[-1], 0.935) self.assertClose( r(pad('++++++++.---.+++++++..+++.', maxlen, padchr)).episode_rewards[-1], 0.75) task = code_tasks.make_paper_task( 'reverse', timestep_limit=maxlen, do_code_simplification=False) reward_fns = task.rl_batch(1) r = reward_fns[0] self.assertClose( r(pad('>,>,>,.<.<.<.', maxlen, padchr)).episode_rewards[-1], 0.1345) self.assertClose( r(pad(',[>,]+[,<.]', maxlen, padchr)).episode_rewards[-1], 1.0) task = code_tasks.make_paper_task( 'reverse', timestep_limit=maxlen, do_code_simplification=True) reward_fns = task.rl_batch(1) r = reward_fns[0] self.assertClose(r('>,>,>,.<.<.<.').episode_rewards[-1], 0.1324) self.assertClose(r(',[>,]+[,<.]').episode_rewards[-1], 0.9725) self.assertClose( r(pad(',[>,]+[,<.]', maxlen, padchr)).episode_rewards[-1], 0.75)
def testKnownCodeBaseTask(self): maxlen = 100 padchr = '[' task = code_tasks.make_paper_task('shift-left', timestep_limit=maxlen, do_code_simplification=False) reward_fns = task.rl_batch(1) r = reward_fns[0] self.assertClose( r(pad(',>,[.,]<.,.', maxlen, padchr)).episode_rewards[-1], 1.0)
def testKnownCodeBaseTask(self): maxlen = 100 padchr = '[' task = code_tasks.make_paper_task( 'shift-left', timestep_limit=maxlen, do_code_simplification=False) reward_fns = task.rl_batch(1) r = reward_fns[0] self.assertClose( r(pad(',>,[.,]<.,.', maxlen, padchr)).episode_rewards[-1], 1.0)