def train(field): alphabet = ''.join(json.load(open('./cn-alphabet.json', 'rb'))) nclass = len(alphabet) + 1 # add the dash - batch_size = BATCH_SIZE if field == 'address' or field == 'psb': batch_size = 1 # image length varies converter = LabelConverter(alphabet) criterion = CTCLoss(zero_infinity=True) crnn = CRNN(IMAGE_HEIGHT, nc, nclass, number_hidden) crnn.apply(weights_init) image_transform = transforms.Compose([ Rescale(IMAGE_HEIGHT), transforms.ToTensor(), Normalize() ]) dataset = LmdbDataset(db_path, field, image_transform) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4) image = torch.FloatTensor(batch_size, 3, IMAGE_HEIGHT, IMAGE_HEIGHT) text = torch.IntTensor(batch_size * 5) length = torch.IntTensor(batch_size) image = Variable(image) text = Variable(text) length = Variable(length) loss_avg = utils.averager() optimizer = optim.RMSprop(crnn.parameters(), lr=lr) if torch.cuda.is_available(): crnn.cuda() crnn = nn.DataParallel(crnn) image = image.cuda() criterion = criterion.cuda() def train_batch(net, iteration): data = iteration.next() cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.load_data(image, cpu_images) t, l = converter.encode(cpu_texts) utils.load_data(text, t) utils.load_data(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size crnn.zero_grad() cost.backward() optimizer.step() return cost nepoch = 25 for epoch in range(nepoch): train_iter = iter(dataloader) i = 0 while i < len(dataloader): for p in crnn.parameters(): p.requires_grad = True crnn.train() cost = train_batch(crnn, train_iter) loss_avg.add(cost) i += 1 if i % 500 == 0: print('%s [%d/%d][%d/%d] Loss: %f' % (datetime.datetime.now(), epoch, nepoch, i, len(dataloader), loss_avg.val())) loss_avg.reset() # do checkpointing if i % 500 == 0: torch.save( crnn.state_dict(), f'{model_path}crnn_{field}_{epoch}_{i}.pth')
nh = 256 nclass = len(c.alphabet) + 1 # input channel , 因为训练图片是转成灰度图,所以该值为1 nc = 1 lr = 0.001 beta1 = 0.5 MOMENTUM = 0.9 EPOCH = 100 # 字符转换编码 converter = utils.strLabelConverter(c.alphabet) # 损失函数 criterion = CTCLoss() crnn = CRNN(imgH, nc, nclass, nh, ngpu) crnn.apply(weights_init) if os.path.exists('/home/hecong/temp/data/ocr/simple_ocr.pkl'): crnn.load_state_dict( torch.load('/home/hecong/temp/data/ocr/simple_ocr.pkl')) image = torch.FloatTensor(batchSize, 3, imgH, imgH) text = torch.IntTensor(batchSize * 5) length = torch.IntTensor(batchSize) # optimizer = optim.Adam( # crnn.parameters(), lr=lr, betas=(beta1, 0.999)) optimizer = optim.SGD(crnn.parameters(), lr=lr, momentum=MOMENTUM) for epoch in range(EPOCH): for step, (t_image, t_label) in enumerate(train_loader): batch_size = t_image.size(0)