def main(stdscr, model_path: str, num: int, result_logger: ResultLogger): terminal = Terminal(stdscr, create_char_map()) terminal.init_window(FIELD_WIDTH, FIELD_ROW) program_set = BubblesortProgramSet() Bubblesort_env = BubblesortEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH) questions = create_questions(num) if DEBUG_MODE: questions = questions[-num:] system = RuntimeSystem(terminal=terminal) npi_model = BubblesortNPIModel(system, model_path, program_set) npi_runner = TerminalNPIRunner(terminal, npi_model, recording=False) npi_runner.verbose = DEBUG_MODE correct_count = wrong_count = 0 for data in questions: Bubblesort_env.reset() try: run_npi(Bubblesort_env, npi_runner, program_set.BUBBLESORT, data) if data['correct']: correct_count += 1 else: wrong_count += 1 except StopIteration: wrong_count += 1 pass result_logger.write(data) terminal.add_log(data) return correct_count, wrong_count
def main(stdscr, model_path: str, num: int, result_logger: ResultLogger): terminal = Terminal(stdscr, create_char_map()) terminal.init_window(FIELD_WIDTH, FIELD_ROW) program_set = MultiplicationProgramSet() multiplyition_env = MultiplicationEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH) questions = create_questions(num) if DEBUG_MODE: questions = questions[-num:] system = RuntimeSystem(terminal=terminal) npi_model = MultiplicationNPIModel(system, terminal, model_path, program_set) npi_runner = TerminalNPIRunner(terminal, npi_model, recording=False) npi_runner.verbose = DEBUG_MODE correct_count = wrong_count = 0 for data in questions: multiplyition_env.reset() run_npi(multiplyition_env, npi_runner, program_set.ADD, data) result_logger.write(data) terminal.multiply_log(data) if data['correct']: correct_count += 1 else: wrong_count += 1 return correct_count, wrong_count
def main(stdscr, filename: str, num_of_questions: int, result_logger: ResultLogger): terminal = Terminal(stdscr, create_char_map()) terminal.init_window(FIELD_WIDTH, FIELD_ROW) program_set = MultiplicationProgramSet() multiplication_env = MultiplicationEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH, terminal) questions = create_questions(num_of_questions) teacher = MultiplicationTeacher(program_set, terminal) npi_runner = TerminalNPIRunner(terminal, teacher, result_logger) npi_runner.verbose = DEBUG_MODE steps_list = [] for data in questions: multiplication_env.reset() q = copy(data) run_npi(multiplication_env, npi_runner, program_set.MUL, data) steps_list.append({"q": q, "steps": npi_runner.step_list}) result_logger.write(data) terminal.add_log(data) if filename: with open(filename, 'wb') as f: pickle.dump(steps_list, f, protocol=pickle.HIGHEST_PROTOCOL)
def main(stdscr, model_path: str, num: int, result_logger: ResultLogger): terminal = Terminal(stdscr, create_char_map()) terminal.init_window(FIELD_WIDTH, FIELD_ROW) program_set = BubblesortProgramSet() Bubblesort_env = BubblesortEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH) questions = create_questions(num) if DEBUG_MODE: questions = questions[-num:] system = RuntimeSystem(terminal=terminal) npi_model = BubblesortNPIModel(system, model_path, program_set) npi_runner = TerminalNPIRunner(terminal, npi_model, recording=False) npi_runner.verbose = DEBUG_MODE correct_count = wrong_count = 0 slot_num = 10 # 10 means 10 slots, each # slot's size is 0.1 for i in range(slot_num): arr.append([]) # Put array elements in different buckets for j in x: index_b = int(slot_num * j) arr[index_b].append(j) # Sort individual buckets for i in range(slot_num): Bubblesort_env.reset() try: run_npi(Bubblesort_env, npi_runner, program_set.BUBBLESORT, data) if data['correct']: correct_count += 1 else: wrong_count += 1 except StopIteration: wrong_count += 1 pass result_logger.write(data) terminal.add_log(data) return correct_count, wrong_count # concatenate the result k = 0 for i in range(slot_num): for j in range(len(arr[i])): x[k] = arr[i][j] k += 1 return x
def test_to_subset(self, questions): selectionsort_env = SelectionsortEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH) teacher = SelectionsortTeacher(self.program_set) npi_runner = TerminalNPIRunner(None, self) teacher_runner = TerminalNPIRunner(None, teacher) correct_count = wrong_count = 0 wrong_steps_list = [] for idx, question in enumerate(questions): question = copy(question) if self.question_test(selectionsort_env, npi_runner, question): correct_count += 1 else: self.question_test(selectionsort_env, teacher_runner, question) wrong_steps_list.append({"q": question, "steps": teacher_runner.step_list}) wrong_count += 1 return correct_count, wrong_count, wrong_steps_list
def test_to_subset(self, questions): print("####test_to_subset###") multiplication_env= MultiplicationEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH, self.terminal) teacher = MultiplicationTeacher(self.program_set) npi_runner = TerminalNPIRunner(self.terminal, self) teacher_runner = TerminalNPIRunner(self.terminal, teacher) correct_count = wrong_count = 0 wrong_steps_list = [] for idx, question in enumerate(questions): question = copy(question) if self.question_test(multiplication_env, npi_runner, question): correct_count += 1 else: self.question_test(multiplication_env, teacher_runner, question) wrong_steps_list.append({"q": question, "steps": teacher_runner.step_list}) wrong_count += 1 return correct_count, wrong_count, wrong_steps_list
def main(stdscr, filename: str, num: int, result_logger: ResultLogger): terminal = Terminal(stdscr, create_char_map()) terminal.init_window(FIELD_WIDTH, FIELD_ROW) program_set = AdditionProgramSet() addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH) questions = create_questions(num) teacher = AdditionTeacher(program_set) npi_runner = TerminalNPIRunner(terminal, teacher) npi_runner.verbose = DEBUG_MODE steps_list = [] for data in questions: addition_env.reset() q = copy(data) run_npi(addition_env, npi_runner, program_set.ADD, data) steps_list.append({"q": q, "steps": npi_runner.step_list}) result_logger.write(data) terminal.add_log(data) if filename: with open(filename, 'wb') as f: pickle.dump(steps_list, f, protocol=pickle.HIGHEST_PROTOCOL)
def main(stdscr, filename: str, num: int, result_logger: ResultLogger): terminal = Terminal(stdscr, create_char_map()) terminal.init_window(FIELD_WIDTH, FIELD_ROW) program_set = BubblesortProgramSet() bubblesort_env = BubblesortEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH) questions = create_questions(number=num) teacher = BubblesortTeacher(program_set) npi_runner = TerminalNPIRunner(terminal, teacher) npi_runner.verbose = DEBUG_MODE steps_list = [] f = open("debug_log.csv", "w") for data in questions: bubblesort_env.reset() q = copy(data) run_npi(bubblesort_env, npi_runner, program_set.BUBBLESORT, data) steps_list.append({"q": q, "steps": npi_runner.step_list}) for step in npi_runner.step_list: f.write("{},".format(step.input.env[0])) f.write("{},".format(step.input.env[1])) f.write("{},".format(step.input.env[2])) f.write("{},".format(step.input.env[3])) f.write("{},".format(step.input.env[4])) f.write("{},".format(step.input.env[5])) f.write("{},".format(step.input.arguments)) f.write("{},".format(step.input.program)) f.write("{},".format(step.output.program)) f.write("{},".format(step.output.r)) f.write("{},".format(step.output.arguments)) f.write("{}\n".format(q)) result_logger.write(data) terminal.add_log(data) f.close() if filename: with open(filename, 'wb') as f: pickle.dump(steps_list, f, protocol=pickle.HIGHEST_PROTOCOL)
def main(stdscr, model_path: str, num: int, result_logger: ResultLogger): terminal = Terminal(stdscr, create_char_map()) terminal.init_window(FIELD_WIDTH, FIELD_ROW) program_set = AdditionProgramSet() addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH) questions = create_questions(num) if DEBUG_MODE: questions = questions[-num:] system = RuntimeSystem(terminal=terminal) npi_model = AdditionNPIModel(system, model_path, program_set) npi_runner = TerminalNPIRunner(terminal, npi_model, recording=False) npi_runner.verbose = DEBUG_MODE correct_count = wrong_count = 0 for data in questions: addition_env.reset() run_npi(addition_env, npi_runner, program_set.ADD, data) result_logger.write(data) terminal.add_log(data) if data["correct"]: correct_count += 1 else: wrong_count += 1 return correct_count, wrong_count
def do_learn(self, steps_list, epoch, pass_rate=1.0, skip_correct=False): addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH) npi_runner = TerminalNPIRunner(None, self) last_weights = None correct_count = Counter() no_change_count = 0 last_loss = 1000 for ep in range(1, epoch + 1): correct_new = wrong_new = 0 losses = [] ok_rate = [] np.random.shuffle(steps_list) for idx, steps_dict in enumerate(steps_list): question = copy(steps_dict['q']) question_key = self.dict_to_str(question) if self.question_test(addition_env, npi_runner, question): if correct_count[question_key] == 0: correct_new += 1 correct_count[question_key] += 1 print("GOOD!: ep=%2d idx=%3d :%s CorrectCount=%s" % (ep, idx, self.dict_to_str(question), correct_count[question_key])) ok_rate.append(1) cc = correct_count[question_key] if skip_correct or int(math.sqrt(cc))**2 != cc: continue else: ok_rate.append(0) if correct_count[question_key] > 0: print( "Degraded: ep=%2d idx=%3d :%s CorrectCount=%s -> 0" % (ep, idx, self.dict_to_str(question), correct_count[question_key])) correct_count[question_key] = 0 wrong_new += 1 steps = steps_dict['steps'] xs = [] ys = [] ws = [] for step in steps: xs.append(self.convert_input(step.input)) y, w = self.convert_output(step.output) ys.append(y) ws.append(w) self.reset() for i, (x, y, w) in enumerate(zip(xs, ys, ws)): loss = self.model.train_on_batch(x, y, sample_weight=w) if not np.isfinite(loss): print("Loss is not finite!, Last Input=%s" % ([i, (x, y, w)])) self.print_weights(last_weights, detail=True) raise RuntimeError("Loss is not finite!") losses.append(loss) last_weights = self.model.get_weights() if losses: cur_loss = np.average(losses) print( "ep=%2d: ok_rate=%.2f%% (+%s -%s): ave loss %s (%s samples)" % (ep, np.average(ok_rate) * 100, correct_new, wrong_new, cur_loss, len(steps_list))) # self.print_weights() if correct_new + wrong_new == 0: no_change_count += 1 else: no_change_count = 0 if math.fabs(1 - cur_loss / last_loss) < 0.001 and no_change_count > 5: print( "math.fabs(1 - cur_loss/last_loss) < 0.001 and no_change_count > 5:" ) return False last_loss = cur_loss print("=" * 80) self.save() if np.average(ok_rate) >= pass_rate: return True return False