def main(stdscr, filename: str, num: int, result_logger: ResultLogger): terminal = Terminal(stdscr, create_char_map()) terminal.init_window(FIELD_WIDTH, FIELD_ROW) program_set = AdditionProgramSet() addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH) questions = create_questions(num) teacher = AdditionTeacher(program_set) npi_runner = TerminalNPIRunner(terminal, teacher) npi_runner.verbose = DEBUG_MODE steps_list = [] for data in questions: addition_env.reset() q = copy(data) run_npi(addition_env, npi_runner, program_set.ADD, data) steps_list.append({"q": q, "steps": npi_runner.step_list}) result_logger.write(data) terminal.add_log(data) if filename: with open(filename, 'wb') as f: pickle.dump(steps_list, f, protocol=pickle.HIGHEST_PROTOCOL)
def main(stdscr, model_path: str, num: int, result_logger: ResultLogger): terminal = Terminal(stdscr, create_char_map()) terminal.init_window(FIELD_WIDTH, FIELD_ROW) program_set = AdditionProgramSet() addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH) questions = create_questions(num) if DEBUG_MODE: questions = questions[-num:] system = RuntimeSystem(terminal=terminal) npi_model = AdditionNPIModel(system, model_path, program_set) npi_runner = TerminalNPIRunner(terminal, npi_model, recording=False) npi_runner.verbose = DEBUG_MODE correct_count = wrong_count = 0 for data in questions: addition_env.reset() run_npi(addition_env, npi_runner, program_set.ADD, data) result_logger.write(data) terminal.add_log(data) if data["correct"]: correct_count += 1 else: wrong_count += 1 return correct_count, wrong_count
def main(stdscr, model_path: str, num: int, result_logger: ResultLogger): terminal = Terminal(stdscr, create_char_map()) terminal.init_window(FIELD_WIDTH, FIELD_ROW) program_set = AdditionProgramSet() addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH) questions = create_questions(num) if DEBUG_MODE: questions = questions[-num:] system = RuntimeSystem(terminal=terminal) npi_model = AdditionNPIModel(system, model_path, program_set) npi_runner = TerminalNPIRunner(terminal, npi_model, recording=False) npi_runner.verbose = DEBUG_MODE correct_count = wrong_count = 0 for data in questions: addition_env.reset() run_npi(addition_env, npi_runner, program_set.ADD, data) result_logger.write(data) terminal.add_log(data) if data['correct']: correct_count += 1 else: wrong_count += 1 return correct_count, wrong_count
def test_to_subset(self, questions): addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH) teacher = AdditionTeacher(self.program_set) npi_runner = TerminalNPIRunner(None, self) teacher_runner = TerminalNPIRunner(None, teacher) correct_count = wrong_count = 0 wrong_steps_list = [] for idx, question in enumerate(questions): question = copy(question) if self.question_test(addition_env, npi_runner, question): correct_count += 1 else: self.question_test(addition_env, teacher_runner, question) wrong_steps_list.append({ "q": question, "steps": teacher_runner.step_list }) wrong_count += 1 return correct_count, wrong_count, wrong_steps_list
def do_learn(self, steps_list, epoch, pass_rate=1.0, skip_correct=False): addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH) npi_runner = TerminalNPIRunner(None, self) last_weights = None correct_count = Counter() no_change_count = 0 last_loss = 1000 for ep in range(1, epoch + 1): correct_new = wrong_new = 0 losses = [] ok_rate = [] np.random.shuffle(steps_list) for idx, steps_dict in enumerate(steps_list): question = copy(steps_dict['q']) question_key = self.dict_to_str(question) if self.question_test(addition_env, npi_runner, question): if correct_count[question_key] == 0: correct_new += 1 correct_count[question_key] += 1 print("GOOD!: ep=%2d idx=%3d :%s CorrectCount=%s" % (ep, idx, self.dict_to_str(question), correct_count[question_key])) ok_rate.append(1) cc = correct_count[question_key] if skip_correct or int(math.sqrt(cc))**2 != cc: continue else: ok_rate.append(0) if correct_count[question_key] > 0: print( "Degraded: ep=%2d idx=%3d :%s CorrectCount=%s -> 0" % (ep, idx, self.dict_to_str(question), correct_count[question_key])) correct_count[question_key] = 0 wrong_new += 1 steps = steps_dict['steps'] xs = [] ys = [] ws = [] for step in steps: xs.append(self.convert_input(step.input)) y, w = self.convert_output(step.output) ys.append(y) ws.append(w) self.reset() for i, (x, y, w) in enumerate(zip(xs, ys, ws)): loss = self.model.train_on_batch(x, y, sample_weight=w) if not np.isfinite(loss): print("Loss is not finite!, Last Input=%s" % ([i, (x, y, w)])) self.print_weights(last_weights, detail=True) raise RuntimeError("Loss is not finite!") losses.append(loss) last_weights = self.model.get_weights() if losses: cur_loss = np.average(losses) print( "ep=%2d: ok_rate=%.2f%% (+%s -%s): ave loss %s (%s samples)" % (ep, np.average(ok_rate) * 100, correct_new, wrong_new, cur_loss, len(steps_list))) # self.print_weights() if correct_new + wrong_new == 0: no_change_count += 1 else: no_change_count = 0 if math.fabs(1 - cur_loss / last_loss) < 0.001 and no_change_count > 5: print( "math.fabs(1 - cur_loss/last_loss) < 0.001 and no_change_count > 5:" ) return False last_loss = cur_loss print("=" * 80) self.save() if np.average(ok_rate) >= pass_rate: return True return False