Exemple #1
0
def main(stdscr, filename: str, num: int, result_logger: ResultLogger):
    terminal = Terminal(stdscr, create_char_map())
    terminal.init_window(FIELD_WIDTH, FIELD_ROW)
    program_set = AdditionProgramSet()
    addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH)

    questions = create_questions(num)
    teacher = AdditionTeacher(program_set)
    npi_runner = TerminalNPIRunner(terminal, teacher)
    npi_runner.verbose = DEBUG_MODE
    steps_list = []
    for data in questions:
        addition_env.reset()
        q = copy(data)
        run_npi(addition_env, npi_runner, program_set.ADD, data)
        steps_list.append({"q": q, "steps": npi_runner.step_list})
        result_logger.write(data)
        terminal.add_log(data)

    if filename:
        with open(filename, 'wb') as f:
            pickle.dump(steps_list, f, protocol=pickle.HIGHEST_PROTOCOL)
Exemple #2
0
def main(stdscr, model_path: str, num: int, result_logger: ResultLogger):
    terminal = Terminal(stdscr, create_char_map())
    terminal.init_window(FIELD_WIDTH, FIELD_ROW)
    program_set = AdditionProgramSet()
    addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH)

    questions = create_questions(num)
    if DEBUG_MODE:
        questions = questions[-num:]
    system = RuntimeSystem(terminal=terminal)
    npi_model = AdditionNPIModel(system, model_path, program_set)
    npi_runner = TerminalNPIRunner(terminal, npi_model, recording=False)
    npi_runner.verbose = DEBUG_MODE
    correct_count = wrong_count = 0
    for data in questions:
        addition_env.reset()
        run_npi(addition_env, npi_runner, program_set.ADD, data)
        result_logger.write(data)
        terminal.add_log(data)
        if data["correct"]:
            correct_count += 1
        else:
            wrong_count += 1
    return correct_count, wrong_count
Exemple #3
0
def main(stdscr, model_path: str, num: int, result_logger: ResultLogger):
    terminal = Terminal(stdscr, create_char_map())
    terminal.init_window(FIELD_WIDTH, FIELD_ROW)
    program_set = AdditionProgramSet()
    addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH)

    questions = create_questions(num)
    if DEBUG_MODE:
        questions = questions[-num:]
    system = RuntimeSystem(terminal=terminal)
    npi_model = AdditionNPIModel(system, model_path, program_set)
    npi_runner = TerminalNPIRunner(terminal, npi_model, recording=False)
    npi_runner.verbose = DEBUG_MODE
    correct_count = wrong_count = 0
    for data in questions:
        addition_env.reset()
        run_npi(addition_env, npi_runner, program_set.ADD, data)
        result_logger.write(data)
        terminal.add_log(data)
        if data['correct']:
            correct_count += 1
        else:
            wrong_count += 1
    return correct_count, wrong_count
Exemple #4
0
 def test_to_subset(self, questions):
     addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH)
     teacher = AdditionTeacher(self.program_set)
     npi_runner = TerminalNPIRunner(None, self)
     teacher_runner = TerminalNPIRunner(None, teacher)
     correct_count = wrong_count = 0
     wrong_steps_list = []
     for idx, question in enumerate(questions):
         question = copy(question)
         if self.question_test(addition_env, npi_runner, question):
             correct_count += 1
         else:
             self.question_test(addition_env, teacher_runner, question)
             wrong_steps_list.append({
                 "q": question,
                 "steps": teacher_runner.step_list
             })
             wrong_count += 1
     return correct_count, wrong_count, wrong_steps_list
Exemple #5
0
    def do_learn(self, steps_list, epoch, pass_rate=1.0, skip_correct=False):
        addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH)
        npi_runner = TerminalNPIRunner(None, self)
        last_weights = None
        correct_count = Counter()
        no_change_count = 0
        last_loss = 1000
        for ep in range(1, epoch + 1):
            correct_new = wrong_new = 0
            losses = []
            ok_rate = []
            np.random.shuffle(steps_list)
            for idx, steps_dict in enumerate(steps_list):
                question = copy(steps_dict['q'])
                question_key = self.dict_to_str(question)
                if self.question_test(addition_env, npi_runner, question):
                    if correct_count[question_key] == 0:
                        correct_new += 1
                    correct_count[question_key] += 1
                    print("GOOD!: ep=%2d idx=%3d :%s CorrectCount=%s" %
                          (ep, idx, self.dict_to_str(question),
                           correct_count[question_key]))
                    ok_rate.append(1)
                    cc = correct_count[question_key]
                    if skip_correct or int(math.sqrt(cc))**2 != cc:
                        continue
                else:
                    ok_rate.append(0)
                    if correct_count[question_key] > 0:
                        print(
                            "Degraded: ep=%2d idx=%3d :%s CorrectCount=%s -> 0"
                            % (ep, idx, self.dict_to_str(question),
                               correct_count[question_key]))
                        correct_count[question_key] = 0
                        wrong_new += 1

                steps = steps_dict['steps']
                xs = []
                ys = []
                ws = []
                for step in steps:
                    xs.append(self.convert_input(step.input))
                    y, w = self.convert_output(step.output)
                    ys.append(y)
                    ws.append(w)

                self.reset()

                for i, (x, y, w) in enumerate(zip(xs, ys, ws)):
                    loss = self.model.train_on_batch(x, y, sample_weight=w)
                    if not np.isfinite(loss):
                        print("Loss is not finite!, Last Input=%s" %
                              ([i, (x, y, w)]))
                        self.print_weights(last_weights, detail=True)
                        raise RuntimeError("Loss is not finite!")
                    losses.append(loss)
                    last_weights = self.model.get_weights()
            if losses:
                cur_loss = np.average(losses)
                print(
                    "ep=%2d: ok_rate=%.2f%% (+%s -%s): ave loss %s (%s samples)"
                    % (ep, np.average(ok_rate) * 100, correct_new, wrong_new,
                       cur_loss, len(steps_list)))
                # self.print_weights()
                if correct_new + wrong_new == 0:
                    no_change_count += 1
                else:
                    no_change_count = 0

                if math.fabs(1 - cur_loss /
                             last_loss) < 0.001 and no_change_count > 5:
                    print(
                        "math.fabs(1 - cur_loss/last_loss) < 0.001 and no_change_count > 5:"
                    )
                    return False
                last_loss = cur_loss
                print("=" * 80)
            self.save()
            if np.average(ok_rate) >= pass_rate:
                return True
        return False