def main(epoch_num, lr=0.1, training=True, fix_width=True): """ Main Args: training (bool, optional): If True, train the model, otherwise test it (default: True) fix_width (bool, optional): Scale images to fixed size (default: True) """ model_path = ('fix_width_' if fix_width else '') + 'crnn.pth' letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' root = 'data/IIIT5K/' if training: net = CRNN(1, len(letters) + 1) start_epoch = 0 # if there is pre-trained model, load it if os.path.exists(model_path): print('Pre-trained model detected.\nLoading model...') net.load_state_dict(torch.load(model_path)) if torch.cuda.is_available(): print('GPU detected.') net = train(root, start_epoch, epoch_num, letters, net=net, lr=lr, fix_width=fix_width) # save the trained model for training again torch.save(net.state_dict(), model_path) # test test(root, net, letters, fix_width=fix_width) else: net = CRNN(1, len(letters) + 1) if os.path.exists(model_path): net.load_state_dict(torch.load(model_path)) test(root, net, letters, fix_width=fix_width)
# elif isinstance(m, nn.Linear): # m.weight.data.normal_(0.0, 0.02) # m.bias.data.fill_(0) net = CRNN(48, 1, len(char2index), 256, opt.nrnn, opt.dropout, opt.variational_dropout, leakyRelu=True) print(net) params = net.state_dict() params_shape = [] for k, v in params.items(): # print(k, v.numpy().shape, reduce(mul, v.numpy().shape)) params_shape.append(reduce(mul, v.numpy().shape)) params_total = sum(params_shape) print('params_total:', params_total) if opt.finetune: print('Loading model from', opt.modeldir + opt.modelname) net.load_state_dict(torch.load(opt.modeldir + opt.modelname)) else: print('create new model') net.apply(weights_init) if opt.ngpu > 1:
def train(path=None): dataset = FakeTextImageGenerator(batch_size=16).iter() criterion = CTCLoss(reduction="mean", zero_infinity=True) net = CRNN(nclass=100).float() optimizer = optim.Adam(net.parameters(), lr=0.001) if path: checkpoint = torch.load(path) net.load_state_dict(checkpoint["model_state_dict"]) optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) epoch = checkpoint["epoch"] loss = checkpoint["loss"] print(f"model current epoch: {epoch} with loss: {loss}") # loop over the dataset multiple times for epoch in range(1, 1000): running_loss = 0.0 loop = tqdm(range(100)) for i in loop: data = next(dataset) images = data["the_inputs"] labels = data["the_labels"] input_length = data["input_length"] label_length = data["label_length"] targets = data["targets"] # print("target", targets) # print("target l", targets.size()) # print("label_l", label_length) # print("label_l l", label_length.size()) # print("pred_l", input_length) # print("pred_l l", input_length.size()) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(images.float()) # print(outputs[8, 0, :]) # print(outputs[:, 0, :]) # print(outputs.size()) loss = criterion(outputs, labels, input_length, label_length) # print(loss.item()) loss.backward() optimizer.step() running_loss += loss.item() loop.set_postfix(epoch=epoch, loss=(running_loss / (i + 1))) # print(f"Epoch: {epoch} | Loss: {running_loss/100}") torch.save( { "epoch": epoch, "model_state_dict": net.state_dict(), "optimizer_state_dict": optimizer.state_dict(), "loss": running_loss, }, "checkpoint5.pt", ) print("Finished Training")
return accuarcy for i in range(option.nepoch): for j, (input, label) in enumerate(trainset_dataloader): if j == len(trainset_dataloader) - 1: continue crnn.zero_grad() label, length = converter.encode(label) input = input.cuda() predicted_label = crnn(input) predicted_length = [predicted_label.size(0)] * option.batch_size label = torch.tensor(label, dtype=torch.long) label = label.cuda() predicted_length = torch.tensor(predicted_length, dtype=torch.long) length = torch.tensor(length, dtype=torch.long) loss = loss_function(predicted_label, label, predicted_length, length) loss.backward() optimizer.step() total_loss += loss if j % print_every == 0: print('[%d / %d] [%d / %d] loss: %.4f' % (i, option.nepoch, j, len(trainset_dataloader), total_loss / print_every)) total_loss = 0 accuracy = validation() print('save model...') torch.save(crnn.state_dict(), 'model/crnn_%d_%.4f.pth' % (i, accuracy))
def train(): epoch_num = train_parameters["num_epochs"] batch_size = train_parameters["train_batch_size"] place = fluid.CUDAPlace( 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() logger.info('train with {}'.format(place)) with fluid.dygraph.guard(place): # 数据加载 file_list = open(train_parameters['train_list']).readlines() train_reader = get_loader( file_list=file_list, input_size=train_parameters['input_size'], max_char_per_line=train_parameters['max_char_per_line'], mean_color=train_parameters['mean_color'], batch_size=train_parameters['train_batch_size'], mode='train', label_dict=train_parameters['label_dict'], place=place) batch_num = len(train_reader()) crnn = CRNN(train_parameters["class_dim"] + 1, batch_size=batch_size) total_step = batch_num * epoch_num LR = train_parameters['learning_rate'] lr = fluid.layers.polynomial_decay(LR, total_step, 1e-7, power=0.9) # lr = fluid.layers.piecewise_decay([total_step // 3, total_step * 2 // 3], [LR, LR * 0.1, LR * 0.01]) optimizer = fluid.optimizer.Adam(learning_rate=lr, parameter_list=crnn.parameters()) if train_parameters["continue_train"]: # 加载上一次训练的模型,继续训练 params_dict, opt_dict = fluid.load_dygraph('{}/crnn_latest'.format( train_parameters['save_model_dir'])) crnn.set_dict(params_dict) optimizer.set_dict(opt_dict) logger.info("load model from {}".format( train_parameters['save_model_dir'])) current_best = -1 start_epoch = 0 for epoch in range(start_epoch, epoch_num): crnn.train() tic = time.time() for batch_id, (img, label, label_len) in enumerate(train_reader()): out = crnn(img) out_for_loss = fluid.layers.transpose(out, [1, 0, 2]) input_length = np.array([out.shape[1]] * out.shape[0]).astype("int64") input_length = fluid.dygraph.to_variable(input_length) input_length.stop_gradient = True loss = fluid.layers.warpctc( input=out_for_loss, label=label.astype(np.int32), input_length=input_length, label_length=label_len, blank=train_parameters["class_dim"], norm_by_times=True) avg_loss = fluid.layers.reduce_mean(loss) cur_acc_num, cur_all_num = acc_batch(out.numpy(), label.numpy()) if batch_id % 1 == 0: logger.info( "epoch [{}/{}], step [{}/{}], loss: {:.6f}, acc: {:.4f}, lr: {}, time: {:.4f}" .format(epoch, epoch_num, batch_id, batch_num, avg_loss.numpy()[0], cur_acc_num / cur_all_num, optimizer.current_step_lr(), time.time() - tic)) tic = time.time() avg_loss.backward() optimizer.minimize(avg_loss) crnn.clear_gradients() fluid.save_dygraph( crnn.state_dict(), '{}/crnn_latest'.format(train_parameters['save_model_dir'])) fluid.save_dygraph( optimizer.state_dict(), '{}/crnn_latest'.format(train_parameters['save_model_dir'])) crnn.eval() ratio = eval_model(crnn, place=place) if ratio >= current_best: fluid.save_dygraph( crnn.state_dict(), '{}/crnn_best'.format(train_parameters['save_model_dir'])) fluid.save_dygraph( optimizer.state_dict(), '{}/crnn_best'.format(train_parameters['save_model_dir'])) current_best = ratio logger.info("save model to {}, current best acc:{:.2f}".format( train_parameters['save_model_dir'], ratio)) logger.info("train end")
encode_text = Variable(encode_text).to(device) image = Variable(batch["image"]).to(device) model.train() output = model(image) optimizer.zero_grad() output_size = Variable( torch.IntTensor([output.size(0)] * opt.batchsize)).to(device) loss = lossfunction(output, encode_text, output_size, length) loss.backward() optimizer.step() _, output = output.max(2) output = output.transpose(1, 0) outputtext = [] for i in range(0, output.size(0)): decode_text = process.decodetext(output[i]) outputtext += [decode_text] number += 1 if decode_text == raw_text[i]: correct += 1 accuracy = float(correct / number) print('epoch:%d-----step:%d/%d-----loss value:%f-----accuracy:%f\n' % (epoch, step, len(traindata), loss, accuracy)) print(raw_text, '\n') print(outputtext, '\n') writer.add_scalar("Training Loss", loss, step) step += 1 if step % 100 == 0: test() torch.save(model.state_dict(), "savedmodel/epoch%d-step%d.pth" % (epoch, step)) writer.add_scalar("Training Accuracy", accuracy, epoch)
output_conv, output_lstm1, output_lstm1, predictions = model.out( data) total += true_labels.size(0) correct += (predictions.max(1)[1] == true_labels).sum().item() accuracy = 100 * correct / total print('Accuracy of the network on the evaluation dataset: %d %%' % (100 * correct / total)) if accuracy > best_acc: best_acc == accuracy # SAVE MODEL print("SAVING MODEL") torch.save(model.state_dict(), "trained_models/best_model.pt") test_accuracy.append(accuracy) running_loss = 0.0 for i, (data, true_labels) in enumerate(training_dataloader): data = data.type(torch.FloatTensor) true_labels = true_labels.type(torch.LongTensor) # set all gradients to zero model.zero_grad() # Here we get the data from all layers, and the corresponding timesteps output_conv, output_lstm1, output_lstm2, predictions = model.out(data) loss = loss_function(predictions, true_labels)