def train(path, split=[6, 1, 1], batch_size=64, epochs=100, learning_rate=0.001, initial_epoch=0, step_saving=2, model_dir='./', log_file='./history', continue_pkl=None, gpu=True, mode='captcha'): """ :param path: :param split: :param batch_size: :param epochs: :param learning_rate: :param initial_epoch: :param step_saving: :param model_dir: :param log_file: :param continue_pkl: :param gpu: :param mode: :return: """ if mode == 'captcha': from model import CaptchaModel CaptchaModelDynamic = CaptchaModel elif mode == 'kaptcha': from kaptcha_model import CaptchaModel CaptchaModelDynamic = CaptchaModel else: return if not os.path.exists(path): raise FileNotFoundError("未知的训练数据") x_train, y_train, x_dev, y_dev = get_data_split(path, split=split, modes=['train', 'dev']) train_ds = CaptchaLoader((x_train, y_train), shuffle=True) dev_ds = CaptchaLoader((x_dev, y_dev)) train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True) dev_loader = DataLoader(dev_ds, batch_size=batch_size, shuffle=True) gpu_available = torch.cuda.is_available() model = CaptchaModelDynamic() optm = torch.optim.Adam(model.parameters(), lr=learning_rate) loss_fn = nn.CrossEntropyLoss() if gpu and gpu_available: model = model.cuda() loss_fn = loss_fn.cuda() # start from a pickle if continue_pkl is not None and os.path.exists(os.path.join(model_dir, continue_pkl)): if gpu and gpu_available: initial_state = torch.load(os.path.join(model_dir, continue_pkl)) else: initial_state = torch.load(os.path.join(model_dir, continue_pkl), map_location=lambda storage, loc: storage) model.load_state_dict(initial_state['network']) optm.load_state_dict(initial_state['optimizer']) initial_epoch = initial_state['epoch'] + 1 elif continue_pkl is not None and os.path.exists(os.path.join(model_dir, 'model-latest.pkl')): if gpu and gpu_available: latest_state = torch.load(os.path.join(model_dir, 'model-latest.pkl')) else: latest_state = torch.load(os.path.join(model_dir, 'model-latest.pkl'), map_location=lambda storage,loc: storage) model.load_state_dict(latest_state['network']) optm.load_state_dict(latest_state['optimizer']) initial_epoch = latest_state['epoch'] + 1 elif continue_pkl is not None and initial_epoch is not None and os.path.exists(os.path.join(model_dir, 'model-%d.pkl' % initial_epoch)): if gpu and gpu_available: initial_state = torch.load(os.path.join(model_dir, 'model-%d.pkl' % initial_epoch)) else: initial_state = torch.load(os.path.join(model_dir, 'model-%d.pkl' % initial_epoch), map_location=lambda storage, _: storage) model.load_state_dict(initial_state['network']) optm.load_state_dict(initial_state['optimizer']) initial_epoch = initial_state['epoch'] + 1 else: initial_epoch = 0 # load history batch_history_train = load_history(filename='history_batch_train.json', history_path=log_file) epoch_history_train = load_history(filename='history_epoch_train.json', history_path=log_file) epoch_history_dev = load_history(filename='history_epoch_dev.json', history_path=log_file) # slice batch_history_train = batch_history_train[:initial_epoch] epoch_history_train = epoch_history_train[:initial_epoch] epoch_history_dev = epoch_history_dev[:initial_epoch] with tqdm(total=epochs, desc='Epoch', initial=initial_epoch) as epoch_bar: for epoch in range(initial_epoch, epochs): model.train() loss_batchs = [] acc_batchs = [] multi_acc_batchs = [] with tqdm(total=int(np.ceil(len(train_loader.dataset) / batch_size)), desc='Batch') as batch_bar: for batch, (x, y) in enumerate(train_loader): optm.zero_grad() x = torch.tensor(x, requires_grad=True) y = torch.tensor(y) if gpu and gpu_available: x = x.cuda() y = y.cuda() pred_1, pred_2, pred_3, pred_4 = model(x) loss1, loss2, loss3, loss4 = loss_fn(pred_1, y[:,0]), loss_fn(pred_2, y[:,1]), loss_fn(pred_3, y[:,2]), loss_fn(pred_4, y[:,3]) loss_count = loss1 + loss2 + loss3 + loss4 acc_count = acc(pred_1, y[:,0]) + acc(pred_2, y[:,1]) + acc(pred_3, y[:,2]) + acc(pred_4, y[:,3]) acc_mean = acc_count / 4. pred = torch.stack((pred_1, pred_2, pred_3, pred_4), dim=-1) multi_acc_mean = multi_acc(torch.argmax(pred, dim=1), y) loss_batchs.append(loss_count.item()) acc_batchs.append(acc_mean) multi_acc_batchs.append(multi_acc_mean) batch_bar.set_postfix(loss=loss_count.item(), acc=acc_mean, multi_acc=multi_acc_mean) batch_bar.update() batch_history_train.append([loss_count.item(), acc_mean, multi_acc_mean]) save_history('history_batch_train.json', batch_history_train, log_file) loss_count.backward() optm.step() epoch_bar.set_postfix(loss_mean=np.mean(loss_batchs), acc_mean=np.mean(acc_batchs), multi_acc_mean=np.mean(multi_acc_batchs)) epoch_bar.update() epoch_history_train.append([np.mean(loss_batchs).item(), np.mean(acc_batchs).item(), np.mean(multi_acc_batchs).item()]) save_history('history_epoch_train.json', epoch_history_train, log_file) # validate with tqdm(total=int(np.ceil(len(dev_loader.dataset) / batch_size)), desc='Val Batch') as batch_bar: model.eval() loss_batchs_dev = [] acc_batchs_dev = [] multi_acc_batchs_dev = [] for batch, (x, y) in enumerate(dev_loader): x = torch.tensor(x, requires_grad=False) y = torch.tensor(y, requires_grad=False) if gpu and gpu_available: x = x.cuda() y = y.cuda() pred_1, pred_2, pred_3, pred_4 = model(x) loss1, loss2, loss3, loss4 = loss_fn(pred_1, y[:,0]), loss_fn(pred_2, y[:,1]), loss_fn(pred_3, y[:,2]), loss_fn(pred_4, y[:,3]) loss_count = loss1 + loss2 + loss3 + loss4 acc_count = acc(pred_1, y[:,0]) + acc(pred_2, y[:,1]) + acc(pred_3, y[:,2]) + acc(pred_4, y[:,3]) acc_mean = acc_count / 4. pred = torch.stack((pred_1, pred_2, pred_3, pred_4), dim=-1) multi_acc_mean = multi_acc(torch.argmax(pred, dim=1), y) loss_batchs_dev.append(loss_count.item()) acc_batchs_dev.append(acc_mean) multi_acc_batchs_dev.append(multi_acc_mean) batch_bar.set_postfix(loss=loss_count.item(), acc=acc_mean, multi_acc=multi_acc_mean) batch_bar.update() epoch_history_dev.append([np.mean(loss_batchs_dev).item(), np.mean(acc_batchs_dev).item(), np.mean(multi_acc_batchs_dev).item()]) save_history('history_epoch_dev.json', epoch_history_dev, log_file) # saving if not os.path.exists(model_dir): os.mkdir(model_dir) state_dict = { 'network': model.state_dict(), 'optimizer': optm.state_dict(), 'epoch': epoch } if epoch % step_saving == 0: model_path = os.path.join(model_dir, 'model-%d.pkl' % epoch) torch.save(state_dict, model_path) torch.save(state_dict, os.path.join(model_dir, 'model-latest.pkl'))
def eval(model_dir, data_dir, batch_size=64, log_dir='./logs', use_gpu=True, mode='captcha'): """ :param model_dir: :param data_dir: :param batch_size: :param log_dir: :param use_gpu: :param mode: :return: """ x_test, y_test = get_data_split(data_dir, modes=['test']) if mode == 'captcha': from model import CaptchaModel elif mode == 'kaptcha': from kaptcha_model import CaptchaModel model = CaptchaModel() gpu_available = torch.cuda.is_available() if use_gpu and gpu_available: model = model.cuda() model_state = torch.load(model_dir) else: model_state = torch.load(model_dir, map_location=lambda storage, loc: storage) model.load_state_dict(model_state['network']) test_ds = CaptchaLoader((x_test, y_test), shuffle=True) test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=True) model.eval() acc_history = [] with tqdm(total=int(np.ceil(len(test_loader.dataset) / batch_size)), desc='Eval') as eval_bar: for _, (x, y) in enumerate(test_loader): x = torch.tensor(x, requires_grad=False) y = torch.tensor(y, requires_grad=False) if use_gpu and gpu_available: x = x.cuda() y = y.cuda() pred1, pred2, pred3, pred4 = model(x) acc_mean = np.mean([ acc(pred1, y[:, 0]), acc(pred2, y[:, 1]), acc(pred3, y[:, 2]), acc(pred4, y[:, 3]) ]) pred = torch.stack((pred1, pred2, pred3, pred4), dim=-1) multi_acc_mean = multi_acc(torch.argmax(pred, dim=1), y) acc_history.append([acc_mean.item(), multi_acc_mean]) eval_bar.update() eval_bar.set_postfix(acc=acc_mean, multi_acc=multi_acc_mean) if not os.path.exists(log_dir): os.mkdir(log_dir) with open(os.path.join(log_dir, 'eval.json'), mode=r'w') as out_fp: json.dump(acc_history, out_fp)