def testTimeout(self): er = bf.evaluate('+.[].', base=5, input_buffer=[], timeout=0.1) self.assertEqual(([1], False, bf.Status.TIMEOUT), (er.output, er.success, er.failure_reason)) self.assertTrue(0.07 < er.time < 0.21) er = bf.evaluate('+.[-].', base=5, input_buffer=[], timeout=0.1) self.assertEqual(([1, 0], True, bf.Status.SUCCESS), (er.output, er.success, er.failure_reason)) self.assertTrue(er.time < 0.15)
def testBasicOps(self): self.assertCorrectOutput([3, 1, 2], bf.evaluate('+++.--.+.')) self.assertCorrectOutput([1, 1, 2], bf.evaluate('+.<.>++.')) self.assertCorrectOutput([0], bf.evaluate('+,.')) self.assertCorrectOutput( [ord(char) for char in 'Hello World!\n'], bf.evaluate( '>++++++++[-<+++++++++>]<.>>+>-[+]++>++>+++[>[->+++<<+++>]<<]>-----' '.>->+++..+++.>-.<<+[>[+>+]>>]<--------------.>>.+++.------.-------' '-.>+.>+.'))
def testUnmatchedBraces(self): self.assertCorrectOutput([3, 6, 1], bf.evaluate('+++.]]]]>----.[[[[[>+.', input_buffer=[], base=10, require_correct_syntax=False)) eval_result = bf.evaluate('+++.]]]]>----.[[[[[>+.', input_buffer=[], base=10, require_correct_syntax=True) self.assertEqual([], eval_result.output) self.assertFalse(eval_result.success) self.assertEqual(bf.Status.SYNTAX_ERROR, eval_result.failure_reason)
def testOutputMemory(self): er = bf.evaluate('+>++>+++>++++.', base=256, input_buffer=[], output_memory=True) self.assertEqual(([4], True, bf.Status.SUCCESS), (er.output, er.success, er.failure_reason)) self.assertEqual([1, 2, 3, 4], er.memory)
def testMaxSteps(self): er = bf.evaluate('+.[].', base=5, input_buffer=[], timeout=None, max_steps=100) self.assertEqual(([1], False, bf.Status.STEP_LIMIT, 100), (er.output, er.success, er.failure_reason, er.steps)) er = bf.evaluate('+.[-].', base=5, input_buffer=[], timeout=None, max_steps=100) self.assertEqual(([1, 0], True, bf.Status.SUCCESS), (er.output, er.success, er.failure_reason)) self.assertTrue(er.steps < 100)
def _test_case_generator(self, code_solution): rand = random.Random(self.seed) for _ in xrange(self.n): input_case = self.make_input_fn(rand) result = bf.evaluate(code_solution, input_buffer=input_case, max_steps=self.max_steps, base=self.base, require_correct_syntax=False) if not result.success: raise RuntimeError( 'Program must succeed. Failed on input: %s' % input_case) yield input_case, result.output
def _score_code(self, code): """Run test cases on code and compute reward. Args: code: A single BF code string. Returns: misc.RewardInfo namedtuple instance containing reward and code execution information, including inputs, expected outputs, code outputs, input and output types, and reason for the reward obtained. """ # Get list of 2-tuples, each containing an input sequence and an output # sequence. io_seqs = self.task.make_io_set() terminal_reward = 0.0 results = [] reason = 'correct' for input_seq, output_seq in io_seqs: eval_result = bf.evaluate( code, input_buffer=input_seq, timeout=0.1, max_steps=self.max_execution_steps, base=self.task.base, require_correct_syntax=self.require_correct_syntax) result, success = eval_result.output, eval_result.success if not success: # Code execution timed out. terminal_reward = self.failure_reward results = [] reason = eval_result.failure_reason break else: terminal_reward += self.reward_fn(result, output_seq, self.task.base) if result == output_seq: terminal_reward += self.correct_bonus # Bonus for correct answer. # Only add additional reward for shorter code. Subtracting reward # interferes with the main objective. Only optimize for length once # any solution is found. if self.min_code_length == self.max_code_length: terminal_reward += self.code_length_bonus else: terminal_reward += self.code_length_bonus * clipped_linear( x=len(code), x0=self.min_code_length, y0=1.0, slope=-self.time_penalty, y_range=(0.0, 1.0)) # reason remains 'correct' if it is already elif reason == 'correct': reason = 'wrong' results.append(result) # Return list of rewards, one for each char in the code. All are 0 except # for the terminal reward. terminal_reward /= self.best_reward return misc.RewardInfo( episode_rewards=[0.0] * (len(code) - 1) + [terminal_reward], input_case=misc.IOTuple(i for i, o in io_seqs), correct_output=misc.IOTuple(o for i, o in io_seqs), code_output=misc.IOTuple(results), input_type=self.input_type, output_type=self.output_type, reason=reason)
def testProgramTrace(self): es = bf.ExecutionSnapshot er = bf.evaluate(',[.>,].', base=256, input_buffer=[2, 1], debug=True) self.assertEqual([ es(codeptr=0, codechar=',', memptr=0, memval=0, memory=[0], next_input=2, output_buffer=[]), es(codeptr=1, codechar='[', memptr=0, memval=2, memory=[2], next_input=1, output_buffer=[]), es(codeptr=2, codechar='.', memptr=0, memval=2, memory=[2], next_input=1, output_buffer=[]), es(codeptr=3, codechar='>', memptr=0, memval=2, memory=[2], next_input=1, output_buffer=[2]), es(codeptr=4, codechar=',', memptr=1, memval=0, memory=[2, 0], next_input=1, output_buffer=[2]), es(codeptr=5, codechar=']', memptr=1, memval=1, memory=[2, 1], next_input=0, output_buffer=[2]), es(codeptr=2, codechar='.', memptr=1, memval=1, memory=[2, 1], next_input=0, output_buffer=[2]), es(codeptr=3, codechar='>', memptr=1, memval=1, memory=[2, 1], next_input=0, output_buffer=[2, 1]), es(codeptr=4, codechar=',', memptr=2, memval=0, memory=[2, 1, 0], next_input=0, output_buffer=[2, 1]), es(codeptr=5, codechar=']', memptr=2, memval=0, memory=[2, 1, 0], next_input=0, output_buffer=[2, 1]), es(codeptr=6, codechar='.', memptr=2, memval=0, memory=[2, 1, 0], next_input=0, output_buffer=[2, 1]), es(codeptr=7, codechar='', memptr=2, memval=0, memory=[2, 1, 0], next_input=0, output_buffer=[2, 1, 0]) ], er.program_trace)
def testBadChars(self): self.assertCorrectOutput([2, 3, 4], bf.evaluate('>,[>,]hello<world[.<]comments', input_buffer=[4, 3, 2]))
def testInputBuffer(self): self.assertCorrectOutput([2, 3, 4], bf.evaluate('>,[>,]<[.<]', input_buffer=[4, 3, 2]))
def testBase(self): self.assertCorrectOutput([1, 4], bf.evaluate('+.--.', base=5, input_buffer=[]))