Esempio n. 1
0
def trainBatch(net, criterion, optimizer):
    data = train_iter.next()
    cpu_images, cpu_texts = data
    batch_size = cpu_images.size(0)
    utils.loadData(image, cpu_images)
    t, l = converter.encode(cpu_texts)
    utils.loadData(text, t)
    utils.loadData(length, l)

    preds = crnn(image)
    preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
    cost = criterion(preds, text, preds_size, length) / batch_size
    crnn.zero_grad()
    cost.backward()
    optimizer.step()
    return cost
Esempio n. 2
0
def val(net, dataset, criterion, max_iter=100):
    print('Start val')

    for p in crnn.parameters():
        p.requires_grad = False

    net.eval()
    data_loader = torch.utils.data.DataLoader(
        dataset, shuffle=True, batch_size=opt.batchSize, num_workers=int(opt.workers))
    val_iter = iter(data_loader)

    i = 0
    n_correct = 0
    loss_avg = utils.averager()

    for i in range(min(max_iter, len(data_loader))):
        data = val_iter.next()
        i += 1
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        utils.loadData(image, cpu_images)
        t, l = converter.encode(cpu_texts)
        utils.loadData(text, t)
        utils.loadData(length, l)

        preds = crnn(image)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        cost = criterion(preds, text, preds_size, length) / batch_size
        loss_avg.add(cost)

        _, preds = preds.max(2)
        preds = preds.squeeze(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        for pred, target in zip(sim_preds, cpu_texts):
            if pred == target.lower():
                n_correct += 1

    raw_preds = converter.decode(preds.data, preds_size.data, raw=True)
    for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts):
        print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))

    accuracy = n_correct / float(max_iter * opt.batchSize)
    print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
Esempio n. 3
0
def val(net, dataset, max_iter=100):
    print('Start val')

    for p in crnn.parameters():
        p.requires_grad = False

    net.eval()
    data_loader = torch.utils.data.DataLoader(dataset,
                                              shuffle=True,
                                              batch_size=opt.batchSize,
                                              num_workers=int(opt.workers))
    val_iter = iter(data_loader)

    i = 0
    n_correct = 0
    # loss averager
    avg_h_val = utils.averager()
    avg_cost_val = utils.averager()
    avg_h_cost_val = utils.averager()

    if opt.eval_all:
        max_iter = len(data_loader)
    else:
        max_iter = min(max_iter, len(data_loader))

    for i in range(max_iter):
        data = val_iter.next()
        # print('data: ', data)
        # print(data)
        i += 1
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        utils.loadData(image, cpu_images)
        t, l = converter.encode(cpu_texts)
        utils.loadData(text, t)
        utils.loadData(length, l)
        # print('len(cpu_images): ', len(cpu_images))
        # print('cpu_texts ', cpu_texts)
        # print('len(cpu_texts): ', len(cpu_texts))
        # print('l ', l)
        # print(len(l))
        # print('length ', length)

        preds = crnn(image)  # size = 26, 64, 96
        # print(preds.size())
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        H, cost = seg_ctc_ent_cost(preds,
                                   text,
                                   preds_size,
                                   length,
                                   uni_rate=opt.uni_rate)
        h_cost = (1 - opt.h_rate) * cost - opt.h_rate * H
        avg_h_val.add(H / batch_size)
        avg_cost_val.add(cost / batch_size)
        avg_h_cost_val.add(h_cost / batch_size)

        _, preds = preds.max(2)  # size = 26, 64
        # print(preds.size())
        # preds = preds.squeeze(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        for idx, (pred, target) in enumerate(zip(sim_preds, cpu_texts)):
            if pred == target.lower():
                n_correct += 1

    raw_preds = converter.decode(preds.data, preds_size.data,
                                 raw=True)[:opt.n_test_disp]
    for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts):
        print('%-30s => %-30s, gt: %-30s' % (raw_pred, pred, gt))

    accuracy = n_correct / float(max_iter * opt.batchSize)
    print(
        'Test H: %f, Cost: %f, H Cost: %f, accuray: %f' %
        (avg_h_val.val(), avg_cost_val.val(), avg_h_cost_val.val(), accuracy))
Esempio n. 4
0
def val(net, dataset, criterion, max_iter=100):
    print('Start val')

    for p in crnn.parameters():
        p.requires_grad = False

    net.eval()
    data_loader = torch.utils.data.DataLoader(dataset,
                                              shuffle=True,
                                              batch_size=opt.batchSize,
                                              num_workers=int(opt.workers))
    val_iter = iter(data_loader)

    i = 0
    n_correct = 0
    loss_avg = utils.averager()

    max_iter = min(max_iter, len(data_loader))
    print("max_iter", max_iter, "len(data_loader)", len(data_loader))
    for i in range(max_iter):
        data = val_iter.next()
        # print(data)
        i += 1
        cpu_images, cpu_texts = data

        batch_size = cpu_images.size(0)
        utils.loadData(image, cpu_images)
        t, l = converter.encode(cpu_texts)

        utils.loadData(text, t)
        utils.loadData(length, l)

        preds = crnn(image)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        cost = criterion(preds, text, preds_size, length) / batch_size
        loss_avg.add(cost)

        _, preds = preds.max(2)
        #        preds = preds.squeeze(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)

        list_cpu_texts = []
        for i in cpu_texts:
            list_cpu_texts.append(i.decode('utf-8', 'strict'))

        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        if (i == 1):
            print(sim_preds)
            print(cpu_texts)
        #        cpu_texts = byte_to_zh(cpu_texts)
        # print("sim_preds",sim_preds)
        for pred, target in zip(sim_preds, list_cpu_texts):
            if (pred == target.lower()) | (pred == target):
                n_correct += 1

    raw_preds = converter.decode(preds.data, preds_size.data,
                                 raw=True)[:opt.n_test_disp]

    for raw_pred, pred, gt in zip(raw_preds, sim_preds, list_cpu_texts):
        print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))

    accuracy = n_correct / float(max_iter * opt.batchSize)
    print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
def val(net, test_dataset, criterion, max_iter=100):
    print('Start val')

    for p in crnn.parameters():
        p.requires_grad = False

    net.eval()
    data_loader = torch.utils.data.DataLoader(test_dataset,
                                              shuffle=False,
                                              batch_size=batchSize,
                                              num_workers=int(workers),
                                              collate_fn=dataset.alignCollate(
                                                  imgH=32,
                                                  imgW=100,
                                                  keep_ratio=True))
    val_iter = iter(data_loader)

    i = 0
    n = 0
    n_correct = 0
    n_text = 0
    loss_avg = util.averager()

    max_iter = len(data_loader)
    for i in range(max_iter):
        data = val_iter.next()
        i += 1
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        util.loadData(image, cpu_images)
        t, l = converter.encode(cpu_texts)
        util.loadData(text, t)
        util.loadData(length, l)

        preds = crnn(image)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        cost = criterion(preds, text, preds_size, length) / batch_size
        loss_avg.add(cost)

        _, preds = preds.max(2)
        preds = preds.squeeze(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        for pred, target in zip(sim_preds, cpu_texts):
            if isinstance(target, unicode) is False:
                target = target.decode('utf-8')
            pred_encode, _ = converter.encode(pred)
            target_encode, _ = converter.encode(target)
            t = editdistance.eval(pred_encode, target_encode)
            l = len(target_encode)
            # chardit1 = chardet.detect(target)
            # print (chardit1)
            print(pred + '>>>>' + target)
            n_correct += t
            n_text += l
            n += 1
    raw_preds = converter.decode(preds.data, preds_size.data,
                                 raw=True)[:n_test_disp]
    for raw_pred, sim_pred, gt in zip(raw_preds, sim_preds, cpu_texts):
        print('%-20s => %-20s, gt: %-20s' % (raw_pred, sim_pred, gt))

    len_edit = n_correct / float(n)
    len_text = n_text / float(n)
    norm = 1 - len_edit / len_text
    print('average editdistance: %f, normalized accuracy: %f' %
          (len_edit, norm))
Esempio n. 6
0
def val(net, valdataset, criterionAttention,criterionCTC, max_iter=100):
    print('Start val')

    for p in crnn.parameters():
        p.requires_grad = False

    net.eval()
    val_sampler = dataset.randomSequentialSampler(valdataset, opt.batchSize)
    data_loader = torch.utils.data.DataLoader(
        valdataset, batch_size=opt.batchSize,
        shuffle=False, sampler=val_sampler,
        num_workers=int(opt.workers),
        collate_fn=dataset.alignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio=opt.keep_ratio))
   # data_loader = torch.utils.data.DataLoader(
   #     dataset, shuffle=True, batch_size=opt.batchSize, num_workers=int(opt.workers))
    val_iter = iter(data_loader)

    i = 0
    n_correct = 0
    loss_avg = utils.averager()

    max_iter = min(max_iter, len(data_loader))
    for i in range(max_iter):
        data = val_iter.next()
        i += 1
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        utils.loadData(image, cpu_images)
        tAttention, lAttention = converterAttention.encode(cpu_texts)
        utils.loadData(textAttention, tAttention)
        utils.loadData(lengthAttention, lAttention)
        tCTC, lCTC = converterCTC.encode(cpu_texts)
        utils.loadData(textCTC, tCTC)
        utils.loadData(lengthCTC, lCTC)
       # print (image)

        if opt.lang:
            predsCTC, predsAttention = crnn(image, lengthAttention, textAttention)
        else:
            predsCTC, predsAttention = crnn(imageAttention, lengthAttention)
        costAttention = criterionAttention(predsAttention, textAttention)
        preds_size = Variable(torch.IntTensor([predsCTC.size(0)] * batch_size))
        costCTC = criterionCTC(predsCTC, textCTC, preds_size, lengthCTC) / batch_size
        loss_avg.add(costAttention)
        loss_avg.add(costCTC.cuda())

        _, predsAttention = predsAttention.max(1)
        predsAttention = predsAttention.view(-1)
        sim_predsAttention = converterAttention.decode(predsAttention.data, lengthAttention.data)
        for pred, target in zip(sim_predsAttention, cpu_texts):
           # target = ''.join(target.split(opt.sep))
            print (pred,target)
            if pred == target:
                n_correct += 1

   # for pred, gt in zip(sim_preds, cpu_texts):
       # gt = ''.join(gt.split(opt.sep))
       # print('%-20s, gt: %-20s' % (pred, gt))

    accuracy = n_correct / float(max_iter * opt.batchSize)
    print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
Esempio n. 7
0
        crnn = torch.nn.DataParallel(crnn, device_ids=range(1))
        image = image.cuda()
        text = text.cuda()
        length = length.cuda()

    counter = 0
    n_correct = 0
    for images, labels in test_loader:
        counter = counter + 1
        batch_size = images.size(0)
        loadData(image, images)
        integer_labels, label_lengths = string_converter.convert_string_to_integer(labels, [])
        loadData(text, integer_labels)
        loadData(length, label_lengths)

        output = crnn(image)
        output = F.log_softmax(output, 2)
        output_size = Variable(torch.IntTensor([output.size(0)] * batch_size))
        cost = loss_function(output, text, output_size, length) / batch_size
        total_cost = total_cost + cost.item()
        _, output = output.max(2)
        output = output.transpose(1, 0).contiguous().view(-1)
        predicted_texts = string_converter.convert_integer_to_string(output.data, output_size.data)
        ground_truth_texts = string_converter.convert_integer_to_string(text.data, length.data)
        for pred, target in zip(predicted_texts, ground_truth_texts):
            if pred == target:
                n_correct += 1
            print('Prediction : {}   GT : {}'.format(pred, target))

    average_cost = total_cost / float(counter)
    accuracy = n_correct / (batch * counter) * 100
Esempio n. 8
0
def val(net, dataset, criterion, max_iter=1000, test_aug=False, n_aug=1):

    print('Start validation set')

    for p in crnn.parameters():
        p.requires_grad = False

    net.eval()

    i = 0
    n_correct = 0
    loss_avg = utils.averager()

    image_count = 0
    # Character and word error rate lists
    char_error = []
    w_error = []
    
    pred_dict = {}
    gt_dict = {}

    for epoch in range(n_aug):
        max_iter = len(dataset) if test_aug else min(max_iter, len(dataset))
        val_iter = iter(dataset)
   
        for i in range(max_iter):
            data = val_iter.next()
            i += 1
            cpu_images, cpu_texts, cpu_files = data
            batch_size = cpu_images.size(0)
            image_count = image_count + batch_size
            utils.loadData(image, cpu_images)
            t, l = converter.encode(cpu_texts)
            utils.loadData(text, t)
            utils.loadData(length, l)

            preds = crnn(image)
            #print(preds.size())
            preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
            cost = criterion(preds, text, preds_size, length) / batch_size
            loss_avg.add(cost)

            # RA: While I am not sure yet, it looks like a greedy decoder and not beam search is being used here
            # Case is ignored in the accuracy, which is not ideal for an actual working system

            _, preds = preds.max(2)
            if torch.__version__ < '0.2':
              preds = preds.squeeze(2) # https://github.com/meijieru/crnn.pytorch/issues/31
            preds = preds.transpose(1, 0).contiguous().view(-1)
            sim_preds = converter.decode(preds.data, preds_size.data, raw=False)

            for pred, target, f in zip(sim_preds, cpu_texts, cpu_files):
                if f not in gt_dict:
                    gt_dict[f] = target
                    pred_dict[f] = []
                pred_dict[f].append(pred)
                if pred == target:
                    n_correct += 1
            
    # Case-sensitive character and word error rates
    for f, target in gt_dict.items():
        # Finds the most commonly predicted string for all the augmented images
        best_pred = Counter(pred_dict[f]).most_common(1)[0][0]
        char_error.append(cer(best_pred, target))
        w_error.append(wer(best_pred, target))

    raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:opt.n_test_disp]
    for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts):
        print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))

    print("Total number of images in validation set: %8d" % image_count)

    accuracy = n_correct / float(max_iter * opt.batchSize)
    print('Test loss: %f, accuracy: %f' % (loss_avg.val(), accuracy))

    char_arr = np.array(char_error)
    w_arr = np.array(w_error)
    char_mean_error = np.mean(char_arr)
    word_mean_error = np.mean(w_arr)

    print("Character error rate mean: %4.4f; Character error rate sd: %4.4f" % (
    char_mean_error, np.std(char_arr, ddof=1)))
    print("Word error rate mean: %4.4f; Word error rate sd: %4.4f" % (word_mean_error, np.std(w_arr, ddof=1)))

    return char_mean_error, word_mean_error, accuracy
Esempio n. 9
0
def val(net, valdataset, criterionAttention, criterionCTC, max_iter=100):
    print('Start val')

    for p in crnn.parameters():
        p.requires_grad = False

    net.eval()
    val_sampler = dataset.randomSequentialSampler(valdataset, opt.batchSize)
    data_loader = torch.utils.data.DataLoader(valdataset,
                                              batch_size=opt.batchSize,
                                              shuffle=False,
                                              sampler=val_sampler,
                                              num_workers=int(opt.workers),
                                              collate_fn=dataset.alignCollate(
                                                  imgH=opt.imgH,
                                                  imgW=opt.imgW,
                                                  keep_ratio=opt.keep_ratio))
    # data_loader = torch.utils.data.DataLoader(
    #     dataset, shuffle=True, batch_size=opt.batchSize, num_workers=int(opt.workers))
    val_iter = iter(data_loader)

    i = 0
    n_correct = 0
    n_correctCTC = 0
    n_correctAttention = 0
    distanceCTC = 0
    distanceAttention = 0
    sum_charNum = 0
    loss_avg = utils.averager()

    max_iter = min(max_iter, len(data_loader))
    for i in range(max_iter):
        data = val_iter.next()
        i += 1
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        utils.loadData(image, cpu_images)
        tAttention, lAttention = converterAttention.encode(cpu_texts)
        utils.loadData(textAttention, tAttention)
        utils.loadData(lengthAttention, lAttention)
        tCTC, lCTC = converterCTC.encode(cpu_texts)
        utils.loadData(textCTC, tCTC)
        utils.loadData(lengthCTC, lCTC)
        # print (image)

        if opt.lang:
            predsCTC, predsAttention = crnn(image, lengthAttention,
                                            textAttention)
        else:
            predsCTC, predsAttention = crnn(imageAttention, lengthAttention)
        costAttention = criterionAttention(predsAttention, textAttention)
        preds_size = Variable(torch.IntTensor([predsCTC.size(0)] * batch_size))
        costCTC = criterionCTC(predsCTC, textCTC, preds_size,
                               lengthCTC) / batch_size
        loss_avg.add(costAttention)
        loss_avg.add(costCTC.cuda())

        _, predsAttention = predsAttention.max(1)
        predsAttention = predsAttention.view(-1)
        sim_predsAttention = converterAttention.decode(predsAttention.data,
                                                       lengthAttention.data)

        _, predsCTC = predsCTC.max(2)
        predsCTC = predsCTC.transpose(1, 0).contiguous().view(-1)
        sim_predsCTC = converterCTC.decode(predsCTC.data,
                                           preds_size.data,
                                           raw=False)

        for i, cpu_text in enumerate(cpu_texts):
            gtText = cpu_text.decode('utf-8')
            CTCText = sim_predsCTC[i]
            if isinstance(CTCText, str):
                CTCText = CTCText.decode('utf-8')
            AttentionText = sim_predsAttention[i]
            print('gtText: %s' % gtText)
            print('CTCText: %s' % CTCText)
            print('AttentionText: %s' % AttentionText)
            if gtText == CTCText:
                n_correctCTC += 1
            if gtText == AttentionText:
                n_correctAttention += 1
            distanceCTC += Levenshtein.distance(CTCText, gtText)
            distanceAttention += Levenshtein.distance(AttentionText, gtText)
            sum_charNum = sum_charNum + len(gtText)

    correctCTC_accuracy = n_correctCTC / float(max_iter * batch_size)
    cerCTC = distanceCTC / float(sum_charNum)
    print('Test CERCTC: %f, accuracyCTC: %f' % (cerCTC, correctCTC_accuracy))
    correctAttention_accuracy = n_correctAttention / float(
        max_iter * batch_size)
    cerAttention = distanceAttention / float(sum_charNum)
    print('Test CERAttention: %f, accuricyAttention: %f' %
          (cerAttention, correctAttention_accuracy))
Esempio n. 10
0
def val(net,
        _dataset1,
        _dataset2,
        _dataset3,
        epoch,
        step,
        criterion,
        max_iter=100):
    logger.info('Start val')
    # for p in crnn.parameters():
    #     p.requires_grad = False
    net.eval()
    data_loader1 = torch.utils.data.DataLoader(
        _dataset1,
        shuffle=False,
        batch_size=params.batchSize,
        num_workers=int(params.workers),
        collate_fn=dataset.alignCollate(imgH=params.imgH,
                                        imgW=params.imgW,
                                        keep_ratio=params.keep_ratio))
    data_loader2 = torch.utils.data.DataLoader(
        _dataset2,
        shuffle=False,
        batch_size=params.batchSize,
        num_workers=int(params.workers),
        collate_fn=dataset.alignCollate(imgH=params.imgH,
                                        imgW=params.imgW,
                                        keep_ratio=params.keep_ratio))
    data_loader3 = torch.utils.data.DataLoader(
        _dataset3,
        shuffle=False,
        batch_size=params.batchSize,
        num_workers=int(params.workers),
        collate_fn=dataset.alignCollate(imgH=params.imgH,
                                        imgW=params.imgW,
                                        keep_ratio=params.keep_ratio))
    val_iter = iter(data_loader1)
    val_iter2 = iter(data_loader2)
    val_iter3 = iter(data_loader3)
    i = 0
    n_correct = 0
    loss_avg = utils.averager()
    max_iter = len(data_loader1)
    record_dir = log_dir + 'epoch_%d_step_%d_data.txt' % (epoch, step)
    r = 1
    f = open(record_dir, "a")
    num_label, num_pred = params.total_num, 0

    start = time.time()
    for i in range(max_iter):
        data = val_iter.next()
        i += 1
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        utils.loadData(image, cpu_images)
        t, l = converter.encode(cpu_texts)
        utils.loadData(text, t)
        utils.loadData(length, l)
        data2 = val_iter2.next()
        cpu_images2, _ = data2
        utils.loadData(image2, cpu_images2)
        data3 = val_iter3.next()
        cpu_images3, _ = data3
        utils.loadData(image3, cpu_images3)
        with torch.no_grad():
            preds = torch.mean(
                torch.cat([
                    torch.unsqueeze(crnn(image), 0),
                    torch.unsqueeze(crnn(image2), 0),
                    torch.unsqueeze(crnn(image3), 0)
                ], 0), 0)
        print('preds: ', preds.shape)
        cost = criterion(preds, text, preds_size, length) / batch_size
        loss_avg.add(cost)
        _, preds = preds.max(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        if not isinstance(sim_preds, list):
            sim_preds = [sim_preds]
        for pred in sim_preds:
            f.write(str(r).zfill(6) + ".jpg " + pred + "\n")
            r += 1
        list_1 = []
        for i in cpu_texts:
            string = i.decode('utf-8', 'strict')
            list_1.append(string)
        for pred, target in zip(sim_preds, list_1):
            if pred == target:
                n_correct += 1

        num_pred += len(sim_preds)

    print("")
    f.close()

    raw_preds = converter.decode(preds.data, preds_size.data,
                                 raw=True)[:params.n_test_disp]
    for raw_pred, pred, gt in zip(raw_preds, sim_preds, list_1):
        logger.info('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))

    logger.info('correct_num: %d' % (n_correct))
    logger.info('Total_num: %d' % (max_iter * params.batchSize))
    accuracy = float(n_correct) / num_pred
    recall = float(n_correct) / num_label
    logger.info(
        'Test loss: %f, accuray: %f, recall: %f, F1 score: %f, Cost : %.4fs per img'
        % (loss_avg.val(), accuracy, recall, 2 * accuracy * recall /
           (accuracy + recall + 1e-2), (time.time() - start) / max_iter))
Esempio n. 11
0
def val(net, dataset, criterion, max_iter=100):
    print('Start val')
    for p in crnn.parameters():
        p.requires_grad = False
    net.eval()
    data_loader = torch.utils.data.DataLoader(dataset,
                                              shuffle=True,
                                              batch_size=params.batchSize,
                                              num_workers=int(params.workers))
    val_iter = iter(data_loader)
    i = 0
    n_correct = 0
    loss_avg = utils.averager()

    max_iter = min(max_iter, len(data_loader))
    for i in range(max_iter):
        data = val_iter.next()
        i += 1
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        utils.loadData(image, cpu_images)
        t, l = converter.encode(cpu_texts)
        utils.loadData(text, t)
        utils.loadData(length, l)
        preds = crnn(image)
        # print('-----preds-----')
        # print(preds)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        # print('-----preds_size-----')
        # print(preds_size)
        cost = criterion(preds, text, preds_size, length) / batch_size
        loss_avg.add(cost)
        _, preds = preds.max(2)
        # print('-----preds.max(2)-----')
        # print(preds)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        # print('-----preds.transpose(1, 0)-----')
        # print(preds)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)

        list_1 = []
        for m in cpu_texts:
            list_1.append(m.decode('utf-8', 'strict'))

        # if (i - 1) % 10 == 0:
        # print('-----sim_preds-----list_1-----')
        # print(sim_preds, list_1)
        for pred, target in zip(sim_preds, list_1):
            if pred == target:
                n_correct += 1


#             else:
#                 print('%-20s, gt: %-20s' % (pred, target))

    raw_preds = converter.decode(preds.data, preds_size.data,
                                 raw=True)[:params.n_test_disp]
    for raw_pred, pred, gt in zip(raw_preds, sim_preds, list_1):
        print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))

    print(n_correct)
    print(max_iter * params.batchSize)
    accuracy = n_correct / float(max_iter * params.batchSize)
    print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
Esempio n. 12
0
def val(net, dataset, criterion, max_iter=2):
    print('Start val')

    for p in crnn.parameters():
        p.requires_grad = False

    net.eval()
    data_loader = torch.utils.data.DataLoader(dataset,
                                              shuffle=False,
                                              batch_size=opt.batchSize,
                                              num_workers=int(opt.workers))
    val_iter = iter(data_loader)

    i = 0
    n_correct = 0
    loss_avg = utils.averager()

    max_iter = min(max_iter, len(data_loader))
    for i in range(max_iter):
        data = val_iter.next()
        i += 1
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        utils.loadData(image, cpu_images)
        if ifUnicode:
            cpu_texts = [clean_txt(tx.decode('utf-8')) for tx in cpu_texts]
        # print(cpu_texts)
        t, l = converter.encode(cpu_texts)
        # print(t)
        # print(l)
        utils.loadData(text, t)
        utils.loadData(length, l)

        preds = crnn(image)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        cost = criterion(preds, text, preds_size, length) / batch_size
        loss_avg.add(cost)

        # print(preds)
        # print(preds.shape)
        _, preds = preds.max(2)
        # print(preds)
        # print(preds.shape)
        # preds = preds.squeeze(2)
        # print(preds)
        # print(preds.shape)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        # print(preds)
        # print(preds.shape)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        print(sim_preds)
        print(cpu_texts)
        for pred, target in zip(sim_preds, cpu_texts):
            if pred.strip() == target.strip():
                n_correct += 1

    # raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:opt.n_test_disp]
    # for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts):
    # print((pred, gt))
    # print
    accuracy = n_correct / float(max_iter * opt.batchSize)
    testLoss = loss_avg.val()
    print('Test loss: %f, accuray: %f' % (testLoss, accuracy))
    return testLoss, accuracy
Esempio n. 13
0
def validation(net, dataset, criterion, max_iter=100):
    """
    validation on val dataset
    """
    print('Start val')

    for p in crnn.parameters():
        p.requires_grad = False

    # change model to eval mode
    net.eval()
    data_loader = torch.utils.data.DataLoader(dataset,
                                              shuffle=True,
                                              batch_size=opt.batchSize,
                                              num_workers=int(opt.workers))

    # returns an iterator for the data_loader
    val_iter = iter(data_loader)

    i = 0
    n_correct = 0
    loss_avg = utils.averager()

    max_iter = min(max_iter, len(data_loader))
    for i in range(max_iter):
        data = val_iter.next()
        i += 1

        # get image and label for the validaton
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        # copy cpu_images to image
        utils.loadData(image, cpu_images)
        # encode label to number
        t, l = converter.encode(cpu_texts)
        # copy label and label`s to t and l
        utils.loadData(text, t)
        utils.loadData(length, l)

        # image (1x1x32x100)
        preds = crnn(image)
        # preds (26x1x37)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        # compute cost
        cost = criterion(preds, text, preds_size, length) / batch_size
        loss_avg.add(cost)

        _, preds = preds.max(2)
        # preds = preds.squeeze(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        for pred, target in zip(sim_preds, cpu_texts):
            # if pred == target.lower():     # for case insensitive
            if pred == target:  # for case sensitive
                n_correct += 1

    raw_preds = converter.decode(preds.data, preds_size.data,
                                 raw=True)[:opt.n_test_disp]

    for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts):
        print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))

    # compute accuracy
    accuracy = n_correct / float(max_iter * opt.batchSize)
    print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
Esempio n. 14
0
def val(net, dataset, criterion, idx, max_iter=20):
    print('Start val')

    for p in crnn.parameters():
        p.requires_grad = False

    net.eval()
    data_loader = torch.utils.data.DataLoader(dataset,
                                              shuffle=True,
                                              batch_size=opt.batchSize,
                                              num_workers=int(opt.workers))
    val_iter = iter(data_loader)

    i = 0
    n_correct = 0
    loss_avg = utils.averager()

    max_iter = min(max_iter, len(data_loader))
    for i in range(max_iter):
        data = val_iter.next()
        i += 1
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        utils.loadData(image, cpu_images)
        t, l = converter.encode(cpu_texts)
        utils.loadData(text, t)
        utils.loadData(length, l)

        preds = crnn(image)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        cost = criterion(preds.cpu(), text, preds_size, length) / batch_size
        loss_avg.add(cost)

        _, preds = preds.max(2)
        # preds = preds.squeeze(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        for pred, target in zip(sim_preds, cpu_texts):
            if pred == target.lower():
                n_correct += 1

    raw_preds = converter.decode(preds.data, preds_size.data,
                                 raw=True)[:opt.n_test_disp]
    for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts):
        print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))
        # writer.add_text('Text', '%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt), idx)

    accuracy = n_correct / float(max_iter * opt.batchSize)

    writer.add_scalars('data/loss', {'val': loss_avg.val()}, idx)
    writer.add_scalars('data/accuracy', {'val': accuracy}, idx)

    print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))

    global best_accuracy
    # save model
    if best_accuracy < accuracy:
        best_accuracy = accuracy
        if best_accuracy > 0.35:
            model_path = os.path.join(model_dir,
                                      '{:.5f}.pth'.format(best_accuracy))
            print('At epoch {}, iter {}, writing model file to {}'.format(
                epoch, i, model_path))
            torch.save(crnn.state_dict(), model_path)