def trainBatch(crnn, criterion, optimizer): crnn.register_backward_hook(backward_hook) data = train_iter.next() cpu_images, cpu_texts = data # print(cpu_images.size()) batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) # c1= converter.decode(t, l, raw=False) # print("=============") # print(cpu_texts) # print("=============") # print(c1) # print("text:",text.size()) preds = crnn(image) preds = F.log_softmax(preds, dim=2) # print("preds:",preds.size()) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) # print(preds.size()) # print(text.size()) # torch.backends.cudnn.flags = False cost = criterion(preds, text.long().cuda(), preds_size, length) / batch_size # print(preds[0]) # print(text[0]) # crnn.zero_grad() optimizer.zero_grad() cost.backward() # torch.nn.utils.clip_grad_norm_(crnn.parameters(), 0.2) optimizer.step() # vutils.save_image(cpu_images, # 'real_samples.png') _, acc = preds.max(2) if int(torch.__version__.split('.')[1]) < 2: acc = acc.squeeze(2) acc = acc.transpose(1, 0).contiguous().view(-1) # for i in acc.data: # print(i) sim_preds = converter.decode(acc, preds_size, raw=False) n_correct = 0 # print(sim_preds) # print(cpu_texts) for pred, target in zip(sim_preds, cpu_texts): # print(pred) # print(target) if pred.lower() == target.lower(): # print(pred.lower()) n_correct += 1 accuracy = n_correct / float(batch_size) # print(accuracy) return cost, accuracy
g[g != g] = 0 # custom weights initialization called on crnn def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: m.weight.data.normal_(0.0, 0.02) elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0) crnn = crnn.CRNN(opt.imgH, nc, nclass, opt.nh) crnn.register_backward_hook( backward_hook ) # xzy 注册hook函数,将nan强改为0,以解决pytorh1.0.1自带CTCLoss训练时权重出现nan问题。 crnn.apply(weights_init) image = torch.FloatTensor(opt.batchSize, 3, opt.imgH, opt.imgH) text = torch.IntTensor(opt.batchSize * 5) length = torch.IntTensor(opt.batchSize) if opt.cuda: crnn.cuda() crnn = torch.nn.DataParallel(crnn, device_ids=range(opt.ngpu)) image = image.cuda() criterion = criterion.cuda() if need_load: #xzy print('loading pretrained model from %s' % saved_model_path)
nclass = len(params.alphabet) + 1 nc = 1 criterion = torch.nn.CTCLoss(reduction='sum') # criterion = CTCLoss() # cnn and rnn crnn = crnn.CRNN(32, nc, nclass, params.nh) crnn.apply(weights_init) if params.crnn != '': print('loading pretrained model from %s' % params.crnn) crnn.load_state_dict(torch.load(params.crnn)) # loss averager loss_avg = utils.averager() # setup optimizer if params.adam: optimizer = optim.Adam(crnn.parameters(), lr=params.lr, betas=(params.beta1, 0.999)) elif params.adadelta: optimizer = optim.Adadelta(crnn.parameters(), lr=params.lr) else: optimizer = optim.RMSprop(crnn.parameters(), lr=params.lr) crnn.register_backward_hook(backward_hook) print(params.experiment) main(crnn, train_loader, val_loader, criterion, optimizer)