Esempio n. 1
0
def val(net, test_dataset, criterion, max_iter=100):
    print('Start val')

    for p in crnn.parameters():
        p.requires_grad = False


#    layer_dict = net.state_dict()
#    print(layer_dict['cnn.conv1.weight'])

    net.eval()
    data_loader = torch.utils.data.DataLoader(test_dataset,
                                              shuffle=False,
                                              batch_size=opt.batchSize,
                                              num_workers=int(opt.workers),
                                              collate_fn=dataset.alignCollate(
                                                  imgH=32,
                                                  imgW=100,
                                                  keep_ratio=True))
    val_iter = iter(data_loader)

    i = 0
    n = 0
    n_correct = 0
    n_text = 0
    loss_avg = util.averager()

    max_iter = len(data_loader)
    for i in range(max_iter):
        data = val_iter.next()
        i += 1
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        util.loadData(image, cpu_images)
        t, l = converter.encode(cpu_texts)

        util.loadData(text, t)
        util.loadData(length, l)

        preds = crnn(image)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        cost = criterion(preds, text, preds_size, length) / batch_size
        loss_avg.add(cost)

        _, preds = preds.max(2)
        #preds = preds.squeeze(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        #	print (preds)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        for pred, target in zip(sim_preds, cpu_texts):
            if isinstance(target, unicode) is False:
                target = target.decode('utf-8')
            pred_encode, _ = converter.encode(pred)
            target_encode, _ = converter.encode(target)
            t = editdistance.eval(pred_encode, target_encode)
            l = len(target_encode)
            n_correct += t
            n_text += l
            n += 1
    raw_preds = converter.decode(preds.data, preds_size.data,
                                 raw=True)[:opt.n_test_disp]
    for raw_pred, sim_pred, gt in zip(raw_preds, sim_preds, cpu_texts):
        gt = gt.lower()
        print('%-20s => %-20s, gt: %-20s' % (raw_pred, sim_pred, gt))
    len_edit = n_correct / float(n)
    len_text = n_text / float(n)
    norm = 1 - len_edit / len_text
    print('editd dist: %f, norm acc: %f' % (n_correct, norm))
Esempio n. 2
0
image = torch.FloatTensor(opt.batchSize, 3, opt.imgH, opt.imgH)
text = torch.IntTensor(opt.batchSize * 5)
length = torch.IntTensor(opt.batchSize)

if opt.cuda:
    crnn = crnn.cuda(id_gpu)
    image = image.cuda(id_gpu)
    criterion = criterion.cuda(id_gpu)

image = Variable(image)
text = Variable(text)
length = Variable(length)

# loss averager
loss_avg = util.averager()

# setup optimizer
if opt.adam:
    optimizer = optim.Adam(crnn.parameters(),
                           lr=opt.lr,
                           betas=(opt.beta1, 0.999))
elif opt.adadelta:
    optimizer = optim.Adadelta(crnn.parameters(), lr=opt.lr)
else:
    optimizer = optim.RMSprop([{
        'params': crnn.cnn.parameters(),
        'lr': 0.0
    }, {
        'params': crnn.rnn.parameters(),
        'lr': opt.lr