def test_test_thread(path: str, thread_num: int, is_save_intermediate_file: bool = False) -> None: # 有bug,废弃 Log(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) Log("开始测试", path, "的数据") exe = ExecuteCode(is_save_intermediate_file=is_save_intermediate_file) exe_res, compile_succ = [], 0 for root, dirs, files in os.walk(path): exe_res, compile_succ = exe.execute_code_thread( PathFunc.to_linux(path), files, thread_num) break Log("编译成功数量:", compile_succ, "/", len(files)) with open("build/execute_info", "wb") as f: pickle.dump(exe_res, f) g = GenerateAns.GenerateAns("ans.csvc", "sample_submission.csv") while True: t = g.get() if t is None: break g.add(1 if compare_test(exe_res[t[0]], exe_res[t[1]]) else 0) Log(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
def test_test(path: str, is_save_intermediate_file: bool = False) -> None: Log("开始测试", path, "的数据") exe_res, compile_succ = dict(), 0 exe = ExecuteCode(is_save_intermediate_file=is_save_intermediate_file) for root, dirs, files in os.walk(path): total_num, cur_num = len(files), 1 for name in files: Log("开始编译", name, ":[", str(cur_num), "/", str(total_num), "]") cur_num += 1 p = PathFunc.join(PathFunc.to_linux(path), name) exe_res[name.split(".")[0]], cs = exe.execute_code(p) compile_succ += cs break Log("编译成功数量:", compile_succ, "/", len(files)) with open("../build/execute_info", "wb") as f: pickle.dump(exe_res, f) with open("../build/execute_info", "rb") as f: exe_res = pickle.load(f) g = GenerateAns.GenerateAns("../ans.csv", "../sample_submission.csv") while True: t = g.get() if t is None: break g.add(1 if compare_test(exe_res[t[0]], exe_res[t[1]]) else 0)
def test_train_in_one_dir(path: str, test_num: int = -1, is_save_intermediate_file=False) -> None: exe_res = [] exe = ExecuteCode(is_save_intermediate_file=is_save_intermediate_file) Log("开始测试", path, "的数据") count = 0 for root, dirs, files in os.walk(path): for name in files: if count == test_num: break count += 1 Log("开始编译", name) p = PathFunc.join(PathFunc.to_linux(path), name) exe_res.append({"res": exe.execute_code(p), "name": name}) break Log("开始比较输出") length = len(exe_res) for i in range(length): for j in range(i + 1, length): if not compare_test(exe_res[i]["res"], exe_res[j]["res"]): l1, l2 = exe_res[i], exe_res[j] print("文件", exe_res[i]["name"], "和", exe_res[j]["name"], "结果不一致") continue
def test_train_random_from_same_dir(dir_path: str, test_num: int, is_save_intermediate_file: bool = False): dir_list = os.listdir(dir_path) dir_list_len = len(dir_list) dir_file = [] for name in dir_list: dir_file_list = os.listdir(os.path.join(dir_path, name)) dir_file.append([name, len(dir_file_list), dir_file_list]) exe = ExecuteCode(is_save_intermediate_file=is_save_intermediate_file) is_same_num, succ_num = 0, 0 for _ in range(test_num): i = random.randint(0, dir_list_len - 1) j1 = random.randint(0, dir_file[i][1] - 1) j2 = random.randint(0, dir_file[i][1] - 1) Log("开始比较 ", dir_file[i][0], " 的 ", dir_file[i][2][j1], " 和 ", dir_file[i][2][j2]) p1 = PathFunc.join(dir_path, dir_file[i][0], dir_file[i][2][j1]) p2 = PathFunc.join(dir_path, dir_file[i][0], dir_file[i][2][j2]) ret_list1, _ = exe.execute_code(p1) ret_list2, _ = exe.execute_code(p2) if not compare_test(ret_list1, ret_list2): GLog("文件不相同") for r1, r2 in zip(ret_list1, ret_list2): GLog(r1, r2) if sum([1 if x.result == 0 else 0 for x in ret_list1]) != len(ret_list1) \ or sum([1 if x.result == 0 else 0 for x in ret_list2]) != len(ret_list2): print("diff") else: succ_num += 1 else: is_same_num += 1 succ_num += 1 return is_same_num, succ_num
def act(mask: int, file_list: list) -> None: exe_res, total_num, cur_num = dict(), len(file_list), 1 for name in file_list: Log(str(mask), "开始编译", name, ":[", str(cur_num), "/", str(total_num), "]") cur_num += 1 p = PathFunc.join(dir_path, name) exe_res[name.split(".")[0]], cs = self.execute_code(p, str(mask)) exe_res_list[mask] = exe_res compile_succ_list[mask] = compile_succ_list.get(mask, 0) + cs
def get_trained_xgb(train_path: str, save_path: str): Log("开始训练模型") for i in range(83): dtrain = xgb.DMatrix( os.path.join(train_path, str(i) + ".csv") + "?format=csv&label_column=0") param = { 'max_depth': 16, 'nthread': 4, 'gamma': 0.00001, 'objective': 'binary:logistic' } bst = xgb.train(param, dtrain, 16) bst.save_model(os.path.join(save_path, str(i) + ".model")) train_preds = bst.predict(dtrain) train_predictions = [round(value) for value in train_preds] y_train = dtrain.get_label() # 值为输入数据的第一行 train_accuracy = f1_score(y_train, train_predictions) Log("Train f1: %.2f%%" % (train_accuracy * 100.0))
def get_trained_xgb(train_path: str, save_path: str, prec: float): get_what = [] for i in range(83): for j in range(int(500 * prec)): get_what.append(i * 500 + j) Log("开始训练模型") for i in range(83): csv = pd.read_csv(os.path.join(train_path, str(i) + ".csv")) train = csv.iloc[get_what, 1:].values labels = csv.iloc[get_what, :1].values dtrain = xgb.DMatrix(train, label=labels) param = {'max_depth': 16, 'nthread': 4, 'gamma': 0.00001, 'objective': 'binary:logistic'} bst = xgb.train(param, dtrain, 16) bst.save_model(os.path.join(save_path, str(i) + "_report.model")) train_preds = bst.predict(dtrain) train_predictions = [round(value) for value in train_preds] y_train = dtrain.get_label() # 值为输入数据的第一行 train_accuracy = f1_score(y_train, train_predictions) Log("Train f1: %.2f%%" % (train_accuracy * 100.0))