Exemple #1
0
    def npi_program_interface(self, env, program: Program, arguments: IntegerArguments, depth=0):
        if self.max_depth < depth or self.max_step < self.steps:
            raise StopIteration()

        self.model.enter_function()

        result = StepOutput(0, None, None)
        while result.r < self.alpha:
            self.steps += 1
            if self.max_step < self.steps:
                raise StopIteration()

            env_observation = env.get_observation()
            result = self.model.step(env_observation, program, arguments.copy())
            if self.recording:
                self.step_list.append(StepInOut(StepInput(env_observation, program, arguments.copy()), result))
            self.display_information(program, arguments, result, depth)

            if program.output_to_env:
                program.do(env, arguments.copy())
                self.display_env(env)
            else:
                if result.program:  # modify original algorithm
                    self.npi_program_interface(env, result.program, result.arguments, depth=depth+1)

        self.model.exit_function()
Exemple #2
0
 def convert_for_step_return(step_values: tuple) -> StepOutput:
     if len(step_values) == 2:
         return StepOutput(PG_CONTINUE, step_values[0],
                           IntegerArguments(step_values[1]))
     else:
         return StepOutput(step_values[0], step_values[1],
                           IntegerArguments(step_values[2]))
Exemple #3
0
    def npi_program_interface(self, env, program: Program, arguments: IntegerArguments, depth=0):
        if self.max_depth < depth or self.max_step < self.steps:
            raise StopIteration()

        self.model.enter_function()

        result = StepOutput(0, None, None)
        while result.r < self.alpha:
            self.steps += 1
            if self.max_step < self.steps:
                raise StopIteration()

            env_observation = env.get_observation()
            result = self.model.step(env_observation, program, arguments.copy())
            if self.recording:
                self.step_list.append(StepInOut(StepInput(env_observation, program, arguments.copy()), result))
            self.display_information(program, arguments, result, depth)

            if program.output_to_env:
                program.do(env, arguments.copy())
                self.display_env(env)
            else:
                if result.program:  # modify original algorithm
                    self.npi_program_interface(env, result.program, result.arguments, depth=depth+1)

        self.model.exit_function()
    def npi_program_interface(self,
                              env,
                              program: Program,
                              arguments: IntegerArguments,
                              depth=0):
        if self.max_depth < depth or self.max_step < self.steps:
            if self.max_step < self.steps:
                print("stop iteration becasue there are too many steps")
                self.terminal.add_log("stop iteration becasue it's too deep")
            if self.max_depth < depth:
                print("stop iteration becasue there are too many steps")
                self.terminal.add_log("stop iteration becasue it's too deep")
            raise StopIteration()

        self.model.enter_function()
        # self.terminal.add_log("enter function")
        result = StepOutput(0, None, None)
        # self.terminal.add_log(result)
        while result.r < self.alpha:
            self.terminal.add_log("in a function")
            self.steps += 1
            if self.max_step < self.steps:
                print("stop iteration becasue there are too many steps")
                self.terminal.add_log(
                    "stop iteration becasue there are too many steps")
                raise StopIteration()
            env_observation = env.get_observation()
            #self.terminal.add_log(env_observation)
            # run our model for one step
            # result is a StepOutput
            result = self.model.step(env_observation, program,
                                     arguments.copy())
            self.terminal.add_log("after excuting funciton")
            # self.terminal.add_log(result)
            # what's the intuition of recording
            if self.recording:
                self.step_list.append(
                    StepInOut(
                        StepInput(env_observation, program, arguments.copy()),
                        result))
            self.display_information(program, arguments, result, depth)

            if program.output_to_env:
                # self.terminal.add_log("output_to_env")
                program.do(env, arguments.copy())
                self.display_env(env)
            else:
                self.terminal.add_log(str(program))
                if result.program:  # modify original algorithm
                    # self.terminal.add_log("execute sub_program")
                    self.npi_program_interface(env,
                                               result.program,
                                               result.arguments,
                                               depth=depth + 1)
        self.terminal.add_log("exit funtion")
        self.model.exit_function()
Exemple #5
0
    def convert_for_step_return(step_values: tuple) -> StepOutput:

        if len(step_values) == 3:
            # this is the StepOutput on the end of primitive program, with the first item specifying PG_RETURN
            return StepOutput(step_values[0], step_values[1],
                              IntegerArguments(step_values[2]))
        else:

            return StepOutput(PG_CONTINUE, step_values[0],
                              IntegerArguments(step_values[1]))
Exemple #6
0
    def step(self, env_observation: np.ndarray, pg: Program, arguments: IntegerArguments) -> StepOutput:
        x = self.convert_input(StepInput(env_observation, pg, arguments))
        results = self.model.predict(x, batch_size=1)  # if batch_size==1, returns single row

        r, pg_one_hot, arg_values = results[0], results[1], results[2:]
        program = self.program_set.get(pg_one_hot.argmax())
        ret = StepOutput(r, program, IntegerArguments(values=np.stack(arg_values)))
        return ret
Exemple #7
0
def run_npi(addition_env, npi_runner, program, data):
    data['expect'] = data['in1'] + data['in2']

    addition_env.setup_problem(data['in1'], data['in2'])

    npi_runner.reset()
    npi_runner.npi_program_interface(addition_env, program,
                                     IntegerArguments(ARG_NUM, ARG_DEPTH))

    data['result'] = addition_env.get_output()
    data['correct'] = data['result'] == data['expect']
def run_npi(addition_env, npi_runner, program, data):
    data['expect'] = data['in1'] + data['in2']

    addition_env.setup_problem(data['in1'], data['in2'])

    npi_runner.reset()
    npi_runner.display_env(addition_env, force=True)
    npi_runner.npi_program_interface(addition_env, program, IntegerArguments())

    data['result'] = addition_env.get_output()
    data['correct'] = data['result'] == data['expect']
Exemple #9
0
def run_npi(bubblesort_env, npi_runner, program, data):
    data['expect'] = sorted(data['raw'])

    bubblesort_env.setup_problem(data['raw'])

    npi_runner.reset()
    npi_runner.display_env(bubblesort_env, force=True)
    npi_runner.npi_program_interface(bubblesort_env, program,
                                     IntegerArguments())

    data['result'] = bubblesort_env.get_output()
    data['correct'] = data['result'] == data['expect']
Exemple #10
0
def run_npi(multiplication_env, npi_runner, program, data):
    data['expect'] = data['mul1'] * data['mul2']

    multiplication_env.setup_problem(data['mul1'], data['mul2'])

    npi_runner.reset()
    # TODO bug here: display_env
    npi_runner.display_env(multiplication_env, force=True)
    npi_runner.npi_program_interface(multiplication_env, program,
                                     IntegerArguments())

    data['result'] = multiplication_env.get_output()
    data['correct'] = data['result'] == data['expect']
Exemple #11
0
    def convert_output(self, p_out: StepOutput):
        y = [np.array((p_out.r, ))]
        weights = [[1.]]
        if p_out.program:
            arg_values = p_out.arguments.values
            arg_num = len(p_out.program.args or [])
            y += [p_out.program.to_one_hot(PROGRAM_VEC_SIZE)]
            weights += [[1.]]
        else:
            arg_values = IntegerArguments().values
            arg_num = 0
            y += [np.zeros((PROGRAM_VEC_SIZE, ))]
            weights += [[1e-10]]

        for v in arg_values:  # split by each args
            y += [v]
        weights += [[1.]] * arg_num + [[1e-10]] * (len(arg_values) - arg_num)
        weights = [np.array(w) for w in weights]
        return [yy.reshape((self.batch_size, -1)) for yy in y], weights
Exemple #12
0
 def decode_params(env_observation: np.ndarray, arguments: IntegerArguments):
     return env_observation.argmax(axis=1), arguments.decode_all()
Exemple #13
0
 def do(self, env: BubblesortEnv, args: IntegerArguments):
     row = args.decode_at(0)
     left_or_right = args.decode_at(1)
     env.move_pointer(row, left_or_right)
Exemple #14
0
 def decode_params(env_observation: np.ndarray,
                   arguments: IntegerArguments):
     return env_observation.argmax(axis=1), arguments.decode_all()
 def do(self, env: AdditionEnv, args: IntegerArguments):
     row = 2 if args.decode_at(0) == self.WRITE_TO_CARRY else 3
     digit = args.decode_at(1)
     env.write(row, digit+1)
Exemple #16
0
 def do(self, env: MultiplicationEnv, args: IntegerArguments):
     row = args.decode_at(0)
     env.get_row(row)
Exemple #17
0
 def do(self, env: AdditionEnv, args: IntegerArguments):
     ptr_kind = args.decode_at(0)
     left_or_right = args.decode_at(1)
     env.move_pointer(ptr_kind, left_or_right)
Exemple #18
0
 def convert_for_step_return(step_values):
     if len(step_values) == 2:
         return StepOutput(PG_CONTINUE, step_values[0], IntegerArguments(ARG_NUM, ARG_DEPTH, step_values[1]))
     else:
         return StepOutput(step_values[0], step_values[1], IntegerArguments(ARG_NUM, ARG_DEPTH, step_values[2]))
Exemple #19
0
 def do(self, env: AdditionEnv, args: IntegerArguments):
     row = 2 if args.decode_at(0) == self.WRITE_TO_CARRY else 3
     digit = args.decode_at(1)
     env.write(row, digit+1)
 def do(self, env: AdditionEnv, args: IntegerArguments):
     ptr_kind = args.decode_at(0)
     left_or_right = args.decode_at(1)
     env.move_pointer(ptr_kind, left_or_right)