def npi_program_interface(self, env, program: Program, arguments: IntegerArguments, depth=0): if self.max_depth < depth or self.max_step < self.steps: raise StopIteration() self.model.enter_function() result = StepOutput(0, None, None) while result.r < self.alpha: self.steps += 1 if self.max_step < self.steps: raise StopIteration() env_observation = env.get_observation() result = self.model.step(env_observation, program, arguments.copy()) if self.recording: self.step_list.append(StepInOut(StepInput(env_observation, program, arguments.copy()), result)) self.display_information(program, arguments, result, depth) if program.output_to_env: program.do(env, arguments.copy()) self.display_env(env) else: if result.program: # modify original algorithm self.npi_program_interface(env, result.program, result.arguments, depth=depth+1) self.model.exit_function()
def convert_for_step_return(step_values: tuple) -> StepOutput: if len(step_values) == 2: return StepOutput(PG_CONTINUE, step_values[0], IntegerArguments(step_values[1])) else: return StepOutput(step_values[0], step_values[1], IntegerArguments(step_values[2]))
def npi_program_interface(self, env, program: Program, arguments: IntegerArguments, depth=0): if self.max_depth < depth or self.max_step < self.steps: if self.max_step < self.steps: print("stop iteration becasue there are too many steps") self.terminal.add_log("stop iteration becasue it's too deep") if self.max_depth < depth: print("stop iteration becasue there are too many steps") self.terminal.add_log("stop iteration becasue it's too deep") raise StopIteration() self.model.enter_function() # self.terminal.add_log("enter function") result = StepOutput(0, None, None) # self.terminal.add_log(result) while result.r < self.alpha: self.terminal.add_log("in a function") self.steps += 1 if self.max_step < self.steps: print("stop iteration becasue there are too many steps") self.terminal.add_log( "stop iteration becasue there are too many steps") raise StopIteration() env_observation = env.get_observation() #self.terminal.add_log(env_observation) # run our model for one step # result is a StepOutput result = self.model.step(env_observation, program, arguments.copy()) self.terminal.add_log("after excuting funciton") # self.terminal.add_log(result) # what's the intuition of recording if self.recording: self.step_list.append( StepInOut( StepInput(env_observation, program, arguments.copy()), result)) self.display_information(program, arguments, result, depth) if program.output_to_env: # self.terminal.add_log("output_to_env") program.do(env, arguments.copy()) self.display_env(env) else: self.terminal.add_log(str(program)) if result.program: # modify original algorithm # self.terminal.add_log("execute sub_program") self.npi_program_interface(env, result.program, result.arguments, depth=depth + 1) self.terminal.add_log("exit funtion") self.model.exit_function()
def convert_for_step_return(step_values: tuple) -> StepOutput: if len(step_values) == 3: # this is the StepOutput on the end of primitive program, with the first item specifying PG_RETURN return StepOutput(step_values[0], step_values[1], IntegerArguments(step_values[2])) else: return StepOutput(PG_CONTINUE, step_values[0], IntegerArguments(step_values[1]))
def step(self, env_observation: np.ndarray, pg: Program, arguments: IntegerArguments) -> StepOutput: x = self.convert_input(StepInput(env_observation, pg, arguments)) results = self.model.predict(x, batch_size=1) # if batch_size==1, returns single row r, pg_one_hot, arg_values = results[0], results[1], results[2:] program = self.program_set.get(pg_one_hot.argmax()) ret = StepOutput(r, program, IntegerArguments(values=np.stack(arg_values))) return ret
def run_npi(addition_env, npi_runner, program, data): data['expect'] = data['in1'] + data['in2'] addition_env.setup_problem(data['in1'], data['in2']) npi_runner.reset() npi_runner.npi_program_interface(addition_env, program, IntegerArguments(ARG_NUM, ARG_DEPTH)) data['result'] = addition_env.get_output() data['correct'] = data['result'] == data['expect']
def run_npi(addition_env, npi_runner, program, data): data['expect'] = data['in1'] + data['in2'] addition_env.setup_problem(data['in1'], data['in2']) npi_runner.reset() npi_runner.display_env(addition_env, force=True) npi_runner.npi_program_interface(addition_env, program, IntegerArguments()) data['result'] = addition_env.get_output() data['correct'] = data['result'] == data['expect']
def run_npi(bubblesort_env, npi_runner, program, data): data['expect'] = sorted(data['raw']) bubblesort_env.setup_problem(data['raw']) npi_runner.reset() npi_runner.display_env(bubblesort_env, force=True) npi_runner.npi_program_interface(bubblesort_env, program, IntegerArguments()) data['result'] = bubblesort_env.get_output() data['correct'] = data['result'] == data['expect']
def run_npi(multiplication_env, npi_runner, program, data): data['expect'] = data['mul1'] * data['mul2'] multiplication_env.setup_problem(data['mul1'], data['mul2']) npi_runner.reset() # TODO bug here: display_env npi_runner.display_env(multiplication_env, force=True) npi_runner.npi_program_interface(multiplication_env, program, IntegerArguments()) data['result'] = multiplication_env.get_output() data['correct'] = data['result'] == data['expect']
def convert_output(self, p_out: StepOutput): y = [np.array((p_out.r, ))] weights = [[1.]] if p_out.program: arg_values = p_out.arguments.values arg_num = len(p_out.program.args or []) y += [p_out.program.to_one_hot(PROGRAM_VEC_SIZE)] weights += [[1.]] else: arg_values = IntegerArguments().values arg_num = 0 y += [np.zeros((PROGRAM_VEC_SIZE, ))] weights += [[1e-10]] for v in arg_values: # split by each args y += [v] weights += [[1.]] * arg_num + [[1e-10]] * (len(arg_values) - arg_num) weights = [np.array(w) for w in weights] return [yy.reshape((self.batch_size, -1)) for yy in y], weights
def decode_params(env_observation: np.ndarray, arguments: IntegerArguments): return env_observation.argmax(axis=1), arguments.decode_all()
def do(self, env: BubblesortEnv, args: IntegerArguments): row = args.decode_at(0) left_or_right = args.decode_at(1) env.move_pointer(row, left_or_right)
def do(self, env: AdditionEnv, args: IntegerArguments): row = 2 if args.decode_at(0) == self.WRITE_TO_CARRY else 3 digit = args.decode_at(1) env.write(row, digit+1)
def do(self, env: MultiplicationEnv, args: IntegerArguments): row = args.decode_at(0) env.get_row(row)
def do(self, env: AdditionEnv, args: IntegerArguments): ptr_kind = args.decode_at(0) left_or_right = args.decode_at(1) env.move_pointer(ptr_kind, left_or_right)
def convert_for_step_return(step_values): if len(step_values) == 2: return StepOutput(PG_CONTINUE, step_values[0], IntegerArguments(ARG_NUM, ARG_DEPTH, step_values[1])) else: return StepOutput(step_values[0], step_values[1], IntegerArguments(ARG_NUM, ARG_DEPTH, step_values[2]))