Beispiel #1
0
def val(test_loader):
	test_iter = iter(test_loader)

	max_iter = len(data_loader)
	n_correct = 0
	n_total = 0

	for i in range(max_iter):
		data = test_iter.next()
		cpu_images = data[0]
		cpu_labels = data[1]
		[num] = cpu_labels.shape

		utils.loadData(image, cpu_images)
		preds = MODEL(image)

		arg_max = preds.argmax(1).cpu.numpy()
		labels = cpu_labels.numpy()
		correct = np.sum(arg_max == labels)

		n_correct += correct
		n_total   += num

	acc = n_correct / float(n_total)
	return acc
Beispiel #2
0
def demo(image_path):
    image = Image.open(image_path).convert('L')
    image = transformer(image)

    if cuda_flag:
        image = image.cuda()
    image = image.view(1, *image.size())
    image = Variable(image)
    text = torch.LongTensor(1 * 5)
    length = torch.IntTensor(1)
    text = Variable(text)
    length = Variable(length)

    max_iter = 20
    t, l = converter.encode('0' * max_iter)
    utils.loadData(text, t)
    utils.loadData(length, l)
    output = MORAN(image, length, text, text, test=True, debug=True)
    # output image
    preds, preds_reverse = output[0]
    # 中间输出:moran
    demo = output[1]

    _, preds = preds.max(1)
    _, preds_reverse = preds_reverse.max(1)

    sim_preds = converter.decode(preds.data, length.data)
    sim_preds = sim_preds.strip().split('$')[0]
    sim_preds_reverse = converter.decode(preds_reverse.data, length.data)
    sim_preds_reverse = sim_preds_reverse.strip().split('$')[0]
    print(image_path)
    print('\nResult:\n' + 'Left to Right: ' + sim_preds + '\nRight to Left: ' +
          sim_preds_reverse)
    return sim_preds
Beispiel #3
0
def train_batch():
    data = train_iter.next()
    cpu_images = data[0]
    cpu_labels = data[1]
    
    utils.loadData(image, cpu_images)
	utils.loadData(ori_label, cpu_labels)
Beispiel #4
0
    def predict(self, img_batch):
        batch_size = int(img_batch.size(0))

        if self.cuda_flag:
            img_batch = img_batch.cuda()
        # img_batch = Variable(img_batch)

        text = torch.LongTensor(batch_size * 5)
        length = torch.IntTensor(batch_size)
        # text = Variable(text)
        # length = Variable(length)

        max_iter = 20
        t, l = self.converter.encode(['0' * max_iter] * batch_size)
        utils.loadData(text, t)
        utils.loadData(length, l)

        output = self.MORAN(img_batch, length, text, text, test=True, debug=True)
        return output, length
Beispiel #5
0
def trainBatch():
    data1 = train_iter1.next()
    data2 = train_iter2.next()
    cpu_images = torch.cat((data1[0], data2[0]), 0)
    cpu_texts1 = data1[1] + data2[1]
    cpu_texts2 = data1[3] + data2[3]

    utils.loadData(image, cpu_images)
    t1, l1 = converter.encode(cpu_texts1, scanned=True)
    utils.loadData(text1_ori, t1)
    utils.loadData(length_ori, l1)
    t2, l2 = converter.encode(cpu_texts2, scanned=True)
    utils.loadData(text2_ori, t2)

    N = len(cpu_texts1)
    if opt.LR is True:
        preds1, preds2 = MODEL(image,
                               length_ori,
                               text1_ori,
                               text2_ori,
                               cpu_texts=cpu_texts1)

        text1_new = text1_ori
        text2_new = text2_ori

        cost_pred1 = criterion(preds1, text1_new) / 2.0
        cost_pred2 = criterion(preds2, text2_new) / 2.0
        loss_pred_avg1.add(cost_pred1)
        loss_pred_avg2.add(cost_pred2)

        cost = cost_pred1 + cost_pred2
    else:
        preds1 = MODEL(image,
                       length_ori,
                       text1_ori,
                       None,
                       cpu_texts=cpu_texts1)

        text1_new = text1_ori

        cost_pred1 = criterion(preds1, text1_new)
        loss_pred_avg1.add(cost_pred1)

        cost = cost_pred1

    loss_avg.add(cost)
    MODEL.zero_grad()
    cost.backward()
    optimizer.step()

    return cost
Beispiel #6
0
def train_batch():
    data = train_iter.next()
    cpu_images = data[0]
    cpu_labels = data[1]

    utils.loadData(image, cpu_images)
    utils.loadData(ori_label, cpu_labels)

    # print('ori_label.shape',ori_label.shape)

    preds = MODEL(image)
    # print('pred---', preds.shape)
    # print('label--', ori_label.shape)
    cost = criterion(preds, ori_label)
    # print('cost-------', cost)

    loss.add(cost)

    MODEL.zero_grad()
    cost.backward()
    optimizer.step()
Beispiel #7
0
def trainBatch():
    data = train_iter.next()
    cpu_images, cpu_texts, cpu_texts_rev = data
    # utils.loadData(image, encode_coordinates_fn(cpu_images))
    utils.loadData(image, cpu_images)
    t, l = converter.encode(cpu_texts, scanned=True)
    t_rev, _ = converter.encode(cpu_texts_rev, scanned=True)
    utils.loadData(text, t)
    utils.loadData(text_rev, t_rev)
    utils.loadData(length, l)
    preds0, preds1 = MORAN(image, length, text, text_rev)
    cost = criterion(torch.cat([preds0, preds1], 0),
                     torch.cat([text, text_rev], 0))

    MORAN.zero_grad()
    cost.backward()
    optimizer.step()
    return cost
Beispiel #8
0
 def train(net, criterion, optimizer, data):
     cpu_images, cpu_texts = data
     batch_size = cpu_images.size(0)             # 计算当前batch_size大小
     utils.loadData(image, cpu_images)
     t, l = converter.encode(cpu_texts)          # 转换为类别
     utils.loadData(text, t)
     utils.loadData(length, l)
     optimizer.zero_grad()                       # 清零梯度
     preds = net(image)
     preds_size = Variable(torch.LongTensor([preds.size(0)] * batch_size))
     cost = criterion(preds, text, preds_size, length) / batch_size
     cost.backward()
     optimizer.step()
     return cost
Beispiel #9
0
    def val(net, criterion, eval_data_batch):
        print('Start val')
        for p in crnn.parameters():
            p.requires_grad = False
        net.eval()

        n_correct = 0
        loss_avg_eval = utils.averager()
        for data in eval_data_batch:
            cpu_images, cpu_texts = data
            batch_size = cpu_images.size(0)
            utils.loadData(image, cpu_images)
            t, l = converter.encode(cpu_texts)
            utils.loadData(text, t)
            utils.loadData(length, l)
            preds = crnn(image)
            preds_size = Variable(torch.LongTensor([preds.size(0)] * batch_size))
            cost = criterion(preds, text, preds_size, length) / batch_size
            loss_avg_eval.add(cost)         # 计算loss

            _, preds = preds.max(2)
            preds = preds.transpose(1, 0).contiguous().view(-1)
            sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
            cpu_texts_decode = []
            for i in cpu_texts:
                cpu_texts_decode.append(i)
            for pred, target in zip(sim_preds, cpu_texts_decode):       # 计算准确率
                if pred == target:
                    n_correct += 1

            raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:config.n_val_disp]
            for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts_decode):
                print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))

        accuracy = n_correct / float(len(eval_dataset))
        print('Val loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
Beispiel #10
0
def val(dataset, criterion, max_iter=10000, steps=0):
    data_loader = torch.utils.data.DataLoader(
        dataset,
        shuffle=False,
        batch_size=args.batchSize,
        num_workers=args.workers)  # args.batchSize
    val_iter = iter(data_loader)
    max_iter = min(max_iter, len(data_loader))
    n_correct = 0
    n_total = 0
    distance = 0.0
    loss_avg = utils.averager()

    f = open('logger/log.txt', 'w', encoding='utf-8')

    for i in range(max_iter):
        data = val_iter.next()
        cpu_images, cpu_texts, cpu_texts_rev = data
        # utils.loadData(image, encode_coordinates_fn(cpu_images))
        utils.loadData(image, cpu_images)
        t, l = converter.encode(cpu_texts, scanned=True)
        t_rev, _ = converter.encode(cpu_texts_rev, scanned=True)
        utils.loadData(text, t)
        utils.loadData(text_rev, t_rev)
        utils.loadData(length, l)
        preds0, _, preds1, _ = MORAN(image,
                                     length,
                                     text,
                                     text_rev,
                                     debug=False,
                                     test=True,
                                     steps=steps)
        cost = criterion(torch.cat([preds0, preds1], 0),
                         torch.cat([text, text_rev], 0))
        preds0_prob, preds0 = preds0.max(1)
        preds0 = preds0.view(-1)
        preds0_prob = preds0_prob.view(-1)
        sim_preds0 = converter.decode(preds0.data, length.data)
        preds1_prob, preds1 = preds1.max(1)
        preds1 = preds1.view(-1)
        preds1_prob = preds1_prob.view(-1)
        sim_preds1 = converter.decode(preds1.data, length.data)
        sim_preds = []
        for j in range(cpu_images.size(0)):
            text_begin = 0 if j == 0 else length.data[:j].sum()
            if torch.mean(preds0_prob[text_begin:text_begin + len(sim_preds0[j].split('$')[0] + '$')]).item() > \
                    torch.mean(preds1_prob[text_begin:text_begin + len(sim_preds1[j].split('$')[0] + '$')]).item():
                sim_preds.append(sim_preds0[j].split('$')[0] + '$')
            else:
                sim_preds.append(sim_preds1[j].split('$')[0][-1::-1] + '$')

        # img_shape = cpu_images.shape[3] / 100, cpu_images.shape[2] / 100
        # input_seq = cpu_texts[0]
        # output_seq = sim_preds[0]
        # attention = alpha[0]
        # attention_image = showAttention(input_seq, output_seq, attention, img_shape)
        # log.image_summary('map/attention', [attention_image], steps)

        loss_avg.add(cost)
        for pred, target in zip(sim_preds, cpu_texts):
            if pred == target.lower():
                n_correct += 1
            f.write("pred %s\t\t\t\t\ttarget %s\n" % (pred, target))
            distance += levenshtein(pred, target) / max(len(pred), len(target))
            n_total += 1

    f.close()

    accuracy = n_correct / float(n_total)
    log.scalar_summary('Validation/levenshtein distance', distance / n_total,
                       steps)
    log.scalar_summary('Validation/loss', loss_avg.val(), steps)
    log.scalar_summary('Validation/accuracy', accuracy, steps)
    return accuracy
Beispiel #11
0
def val_beam(dataset, max_iter=9999):
    rotate90 = dataset.ifRotate90

    data_loader = torch.utils.data.DataLoader(dataset,
                                              shuffle=False,
                                              batch_size=opt.batchSize,
                                              num_workers=1)  # opt.batchSize
    val_iter = iter(data_loader)
    max_iter = min(max_iter, len(data_loader))
    n_correct = 0
    n_total = 0

    for i in range(max_iter):
        data = val_iter.next()
        ori_cpu_images = data[0]
        flag_rotate90 = data[2]
        cpu_texts1 = data[1]
        cpu_texts2 = data[3]

        t1, l1 = converter.encode(cpu_texts1, scanned=True)
        t2, l2 = converter.encode(cpu_texts2, scanned=True)
        utils.loadData(text1_ori, t1)
        utils.loadData(text2_ori, t2)
        utils.loadData(length_ori, l1)
        All_preds_add5EOS1 = []
        All_scores1 = []
        All_preds_add5EOS2 = []
        All_scores2 = []

        cpu_images = ori_cpu_images

        utils.loadData(image, cpu_images)
        if opt.LR:
            local_preds1, local_scores1, local_preds2, local_scores2 = MODEL(
                image,
                length_ori,
                text1_ori,
                text2_ori,
                test=True,
                cpu_texts=cpu_texts1)
            All_preds_add5EOS1.append(local_preds1)
            All_preds_add5EOS2.append(local_preds2)
            All_scores1.append(local_scores1)
            All_scores2.append(local_scores2)
        else:
            local_preds1, local_scores1 = MODEL(image,
                                                length_ori,
                                                text1_ori,
                                                None,
                                                test=True,
                                                cpu_texts=cpu_texts1)
            All_preds_add5EOS1.append(local_preds1)
            All_scores1.append(local_scores1)

        length_label = (length_ori - 1).data.cpu().numpy()

        # %%% Left/Right Rotate %%%
        if rotate90 == True:
            PIL_imgs = [
                toPIL(ori_cpu_images[i].div(2).sub(-0.5))
                for i in range(ori_cpu_images.shape[0])
            ]
            PIL_imgs_left90 = [
                PIL_imgs[i].transpose(Image.ROTATE_90).resize(
                    (opt.imgW, opt.imgH), Image.BILINEAR)
                if flag_rotate90[i] else PIL_imgs[i]
                for i in range(ori_cpu_images.shape[0])
            ]
            PIL_imgs_right90 = [
                PIL_imgs[i].transpose(Image.ROTATE_270).resize(
                    (opt.imgW, opt.imgH), Image.BILINEAR)
                if flag_rotate90[i] else PIL_imgs[i]
                for i in range(ori_cpu_images.shape[0])
            ]
            imgs_Tensor_left90 = [
                toTensor(PIL_imgs_left90[i])
                for i in range(ori_cpu_images.shape[0])
            ]
            imgs_Tensor_right90 = [
                toTensor(PIL_imgs_right90[i])
                for i in range(ori_cpu_images.shape[0])
            ]

            # Left
            cpu_images = torch.stack(imgs_Tensor_left90)
            cpu_images.sub_(0.5).div_(0.5)
            utils.loadData(image, cpu_images)
            if opt.LR:
                local_preds1, local_scores1, local_preds2, local_scores2, _ = MODEL(
                    image,
                    length_ori,
                    text1_ori,
                    text2_ori,
                    test=True,
                    cpu_texts=cpu_texts1)
                All_preds_add5EOS1.append(local_preds1)
                All_preds_add5EOS2.append(local_preds2)
                All_scores1.append(local_scores1)
                All_scores2.append(local_scores2)
            else:
                local_preds1, local_scores1, _ = MODEL(image,
                                                       length_ori,
                                                       text1_ori,
                                                       None,
                                                       test=True,
                                                       cpu_texts=cpu_texts1)
                All_preds_add5EOS1.append(local_preds1)
                All_scores1.append(local_scores1)

            # Right
            cpu_images = torch.stack(imgs_Tensor_right90)
            cpu_images.sub_(0.5).div_(0.5)
            utils.loadData(image, cpu_images)
            if opt.LR:
                local_preds1, local_scores1, local_preds2, local_scores2, _ = MODEL(
                    image,
                    length_ori,
                    text1_ori,
                    text2_ori,
                    test=True,
                    cpu_texts=cpu_texts1)
                All_preds_add5EOS1.append(local_preds1)
                All_preds_add5EOS2.append(local_preds2)
                All_scores1.append(local_scores1)
                All_scores2.append(local_scores2)
            else:
                local_preds1, local_scores1, _ = MODEL(image,
                                                       length_ori,
                                                       text1_ori,
                                                       None,
                                                       test=True,
                                                       cpu_texts=cpu_texts1)
                All_preds_add5EOS1.append(local_preds1)
                All_scores1.append(local_scores1)

        # Start to decode
        preds_add5EOS1 = []
        preds_score1 = []
        for j in range(cpu_images.size(0)):
            text_begin = 0 if j == 0 else (length_ori.data[:j].sum() + j * 5)
            max_score = -99999
            max_index = 0
            for index in range(len(All_scores1)):
                local_score = All_scores1[index][j]
                if local_score > max_score:
                    max_score = local_score
                    max_index = index
            preds_add5EOS1.extend(
                All_preds_add5EOS1[max_index][text_begin:text_begin +
                                              int(length_ori[j].data) + 5])
            preds_score1.append(max_score)
        preds_add5EOS1 = torch.stack(preds_add5EOS1)
        sim_preds_add5eos1 = converter.decode(preds_add5EOS1.data,
                                              length_ori.data + 5)

        if opt.LR:
            preds_add5EOS2 = []
            preds_score2 = []
            for j in range(cpu_images.size(0)):
                text_begin = 0 if j == 0 else (length_ori.data[:j].sum() +
                                               j * 5)
                max_score = -99999
                max_index = 0
                for index in range(len(All_scores2)):
                    local_score = All_scores2[index][j]
                    if local_score > max_score:
                        max_score = local_score
                        max_index = index
                preds_add5EOS2.extend(
                    All_preds_add5EOS2[max_index][text_begin:text_begin +
                                                  int(length_ori[j].data) + 5])
                preds_score2.append(max_score)
            preds_add5EOS2 = torch.stack(preds_add5EOS2)
            sim_preds_add5eos2 = converter.decode(preds_add5EOS2.data,
                                                  length_ori.data + 5)

        if opt.LR:
            batch_index = 0
            for pred1, target1, pred2, target2 in zip(sim_preds_add5eos1,
                                                      cpu_texts1,
                                                      sim_preds_add5eos2,
                                                      cpu_texts2):
                if preds_score1[batch_index] > preds_score2[batch_index]:
                    pred = pred1
                    target = target1
                else:
                    pred = pred2
                    target = target2

                pred = pred.split(opt.sep)[0] + opt.sep
                test_alphabet = dataset.test_alphabet.split(opt.sep)
                pred = ''.join(
                    pred[i].lower() if pred[i].lower() in test_alphabet else ''
                    for i in range(len(pred)))
                target = ''.join(target[i].lower() if target[i].lower() in
                                 test_alphabet else ''
                                 for i in range(len(target)))

                if pred.lower() == target.lower():
                    n_correct += 1
                n_total += 1
                batch_index += 1
        else:
            for pred, target in zip(sim_preds_add5eos1, cpu_texts1):
                pred = pred.split(opt.sep)[0] + opt.sep
                test_alphabet = dataset.test_alphabet.split(opt.sep)
                pred = ''.join(
                    pred[i].lower() if pred[i].lower() in test_alphabet else ''
                    for i in range(len(pred)))
                target = ''.join(target[i].lower() if target[i].lower() in
                                 test_alphabet else ''
                                 for i in range(len(target)))

                if pred.lower() == target.lower():
                    n_correct += 1
                n_total += 1

    accuracy = n_correct / float(n_total)

    dataset_name = dataset.root.split('/')[-1]
    print(dataset_name + ' ACCURACY -----> %.1f%%, ' % (accuracy * 100.0))
    return accuracy
Beispiel #12
0
    def val(dataset, criterion, max_iter=1000):
        print('Start val')
        data_loader = torch.utils.data.DataLoader(
            dataset, shuffle=False, batch_size=opt.batchSize, num_workers=int(opt.workers)) # opt.batchSize
        val_iter = iter(data_loader)
        max_iter = min(max_iter, len(data_loader))
        n_correct = 0
        n_total = 0
        distance = 0.0
        loss_avg = utils.averager()

        f = open('./log.txt','a',encoding='utf-8')

        for i in range(max_iter):
            data = val_iter.next()
            if opt.BidirDecoder:
                cpu_images, cpu_texts, cpu_texts_rev = data
                utils.loadData(image, cpu_images)
                t, l = converter.encode(cpu_texts, scanned=True)
                t_rev, _ = converter.encode(cpu_texts_rev, scanned=True)
                utils.loadData(text, t)
                utils.loadData(text_rev, t_rev)
                utils.loadData(length, l)
                preds0, preds1 = MORAN(image, length, text, text_rev, test=True)
                cost = criterion(torch.cat([preds0, preds1], 0), torch.cat([text, text_rev], 0))
                preds0_prob, preds0 = preds0.max(1)
                preds0 = preds0.view(-1)
                preds0_prob = preds0_prob.view(-1)
                sim_preds0 = converter.decode(preds0.data, length.data)
                preds1_prob, preds1 = preds1.max(1)
                preds1 = preds1.view(-1)
                preds1_prob = preds1_prob.view(-1)
                sim_preds1 = converter.decode(preds1.data, length.data)
                sim_preds = []
                for j in range(cpu_images.size(0)):
                    text_begin = 0 if j == 0 else length.data[:j].sum()
                    if torch.mean(preds0_prob[text_begin:text_begin+len(sim_preds0[j].split('$')[0]+'$')]).data[0] >\
                     torch.mean(preds1_prob[text_begin:text_begin+len(sim_preds1[j].split('$')[0]+'$')]).data[0]:
                        sim_preds.append(sim_preds0[j].split('$')[0]+'$')
                    else:
                        sim_preds.append(sim_preds1[j].split('$')[0][-1::-1]+'$')
            else:
                cpu_images, cpu_texts = data
                utils.loadData(image, cpu_images)
                t, l = converter.encode(cpu_texts, scanned=True)
                utils.loadData(text, t)
                utils.loadData(length, l)
                preds = MORAN(image, length, text, text_rev, test=True)
                cost = criterion(preds, text)
                _, preds = preds.max(1)
                preds = preds.view(-1)
                sim_preds = converter.decode(preds.data, length.data)

            loss_avg.add(cost)
            for pred, target in zip(sim_preds, cpu_texts):
                if pred == target.lower():
                    n_correct += 1
                f.write("预测 %s      目标 %s\n" % ( pred,target ) )
                distance += levenshtein(pred,target) / max(len(pred),len(target))
                n_total += 1

        f.close()

        print("correct / total: %d / %d, "  % (n_correct, n_total))
        print('levenshtein distance: %f' % (distance/n_total))

        accuracy = n_correct / float(n_total)
        print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
        return accuracy
Beispiel #13
0
    new_height = round(target_width * (height / width))
    transformer = dataset.resizeNormalize((target_width, new_height))
    image = transformer(image)

    if cuda_flag:
        image = image.cuda()
    image = image.view(1, *image.size())
    image = Variable(image)
    text = torch.LongTensor(1 * 5)
    length = torch.IntTensor(1)
    text = Variable(text)
    length = Variable(length)

    max_iter = 20
    t, l = converter.encode('0' * max_iter)
    utils.loadData(text, t)
    utils.loadData(length, l)
    output = MORAN(image, length, text, text, test=True, debug=True)

    preds, preds_reverse = output[0]
    demo = output[1]

    _, preds = preds.max(1)
    _, preds_reverse = preds_reverse.max(1)

    sim_preds = converter.decode(preds.data, length.data)
    sim_preds = sim_preds.strip().split('$')[0]
    # sim_preds_reverse = converter.decode(preds_reverse.data, length.data)
    # sim_preds_reverse = sim_preds_reverse.strip().split('$')[0]

    # print('\nResult:\n' + 'Left to Right: ' + sim_preds +
Beispiel #14
0
    def trainBatch(steps):
        data = train_iter.next()
        if opt.BidirDecoder:
            cpu_images, cpu_texts, cpu_texts_rev = data
            utils.loadData(image, cpu_images)
            t, l = converter.encode(cpu_texts, scanned=True)
            t_rev, _ = converter.encode(cpu_texts_rev, scanned=True)
            utils.loadData(text, t)
            utils.loadData(text_rev, t_rev)
            utils.loadData(length, l)
            preds0, preds1 = MORAN(image, length, text, text_rev)
            cost = criterion(torch.cat([preds0, preds1], 0),
                             torch.cat([text, text_rev], 0))
        else:
            cpu_images, cpu_texts = data
            utils.loadData(image, cpu_images)
            t, l = converter.encode(cpu_texts, scanned=True)
            utils.loadData(text, t)
            utils.loadData(length, l)
            preds = MORAN(image, length, text, text_rev)
            cost = criterion(preds, text)

        MORAN.zero_grad()
        cost.backward()  # 反向传播
        optimizer.step()  # 优化器
        return cost
Beispiel #15
0
transformer = dataset.resizeNormalize((100, 32))
image = Image.open(img_path).convert('L')
image = transformer(image)  #读取灰度图像并将其转换成100*32(w,h), image:1x32x100

if cuda_flag:
    image = image.cuda()
image = image.view(1, *image.size())  # 1x1x32x100
image = Variable(image)
text = torch.LongTensor(1 * 5)
length = torch.IntTensor(1)
text = Variable(text)
length = Variable(length)

max_iter = 20
t, l = converter.encode('0' * max_iter)  # 初始化文本内容和文本长度t=20*'0', l=20
utils.loadData(text, t)  #将初始化的值赋值到text和l上
utils.loadData(length, l)

################# 3-模型输出 #######################################
output = MORAN(image, length, text, text, test=True, debug=True)  #这里初始双向的结果

preds, preds_reverse = output[0]  #双向结果
demo = output[1]  #test debug阶段输出矫正的文本

_, preds = preds.max(1)
_, preds_reverse = preds_reverse.max(1)

sim_preds = converter.decode(preds.data,
                             length.data)  #将预测的文本概率转换成文本, jewelers$e$e$e$
sim_preds = sim_preds.strip().split('$')[0]  #jewelers
sim_preds_reverse = converter.decode(preds_reverse.data,
	def predict(self, msg, img, rot=0):
		# # Preprocessing
		gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
		(rows, cols, channels) = img.shape
		mask = np.zeros([rows, cols], dtype = np.uint8)

		for text_bb in msg.text_array:
			if (text_bb.box.ymax - text_bb.box.ymin) * (text_bb.box.xmax - text_bb.box.xmin) < self.bbox_thres:
				continue
			start = time.time()
			image = gray[text_bb.box.ymin:text_bb.box.ymax, text_bb.box.xmin:text_bb.box.xmax]

			image = Im.fromarray(image) 
			image = self.transformer(image)

			if self.cuda_use:
			    image = image.cuda()
			image = image.view(1, *image.size())
			image = Variable(image)
			text = torch.LongTensor(1 * 5)
			length = torch.IntTensor(1)
			text = Variable(text)
			length = Variable(length)

			max_iter = 20
			t, l = self.converter.encode('0'*max_iter)
			utils.loadData(text, t)
			utils.loadData(length, l)
			output = self.network(image, length, text, text, test=True, debug=True)

			preds, preds_reverse = output[0]
			demo = output[1]

			_, preds = preds.max(1)
			_, preds_reverse = preds_reverse.max(1)

			sim_preds = self.converter.decode(preds.data, length.data)
			sim_preds = sim_preds.strip().split('$')[0]
			sim_preds_reverse = self.converter.decode(preds_reverse.data, length.data)
			sim_preds_reverse = sim_preds_reverse.strip().split('$')[0]

			# print('\nResult:\n' + 'Left to Right: ' + sim_preds + '\nRight to Left: ' + sim_preds_reverse + '\n\n')
			print "Text Recognize Time : {}".format(time.time() - start)

			_cont = []
			for p in text_bb.contour:
				point = []
				point.append(p.point[0])
				point.append(p.point[1])
				_cont.append(point)
			_cont = np.array(_cont, np.int32)
			if sim_preds in self.commodity_list:
				cv2.rectangle(img, (text_bb.box.xmin, text_bb.box.ymin),(text_bb.box.xmax, text_bb.box.ymax), self.color_map[rot], 3)
				cv2.putText(img, sim_preds, (text_bb.box.xmin, text_bb.box.ymin), 0, 1, (0, 255, 255),3)
				pix = self.commodity_list.index(sim_preds) + rot*len(self.commodity_list)
				if pix in np.unique(mask):
					cv2.fillConvexPoly(mask, _cont, pix + 4*len(self.commodity_list))
				else:
					cv2.fillConvexPoly(mask, _cont, pix)
			else:
				correct, conf, _bool = self.conf_of_word(sim_preds)

				# print conf
				if _bool:
					cv2.putText(img, correct + "{:.2f}".format(conf), (text_bb.box.xmin, text_bb.box.ymin), 0, 1, (0, 255, 255),3)
					cv2.rectangle(img, (text_bb.box.xmin, text_bb.box.ymin),(text_bb.box.xmax, text_bb.box.ymax), (255, 255, 255), 2)
					pix = self.commodity_list.index(correct) + rot*len(self.commodity_list)
					if pix in np.unique(mask):
						cv2.fillConvexPoly(mask, _cont, pix + 4*len(self.commodity_list))
					else:
						cv2.fillConvexPoly(mask, _cont, pix)
				# else:
				# 	cv2.putText(img, sim_preds, (text_bb.box.xmin, text_bb.box.ymin), 0, 1, (0, 0, 0),3)
				# 	cv2.rectangle(img, (text_bb.box.xmin, text_bb.box.ymin),(text_bb.box.xmax, text_bb.box.ymax), (0, 0, 0), 2)					

		return img, mask
Beispiel #17
0
def trainBatch():
    data = train_iter.next()
    if opt.BidirDecoder:
        cpu_images, cpu_texts, cpu_texts_rev = data  #读取标签数据
        utils.loadData(image, cpu_images)  #将图像数据赋值给image
        t, l = converter.encode(cpu_texts, scanned=True)  #将文本编码成类别标签
        t_rev, _ = converter.encode(cpu_texts_rev, scanned=True)  #将反向文本编码成类别标签
        utils.loadData(text, t)  #将正向文本标签赋予t
        utils.loadData(text_rev, t_rev)  #将反向文本标签赋予t_rev
        utils.loadData(length, l)
        preds0, preds1 = MORAN(image, length, text, text_rev)  #输出正向和反向识别概率
        cost = criterion(torch.cat([preds0, preds1], 0),
                         torch.cat([text, text_rev], 0))  #计算交叉熵损失
    else:
        cpu_images, cpu_texts = data
        utils.loadData(image, cpu_images)
        t, l = converter.encode(cpu_texts, scanned=True)
        utils.loadData(text, t)
        utils.loadData(length, l)
        preds = MORAN(image, length, text, text_rev)
        cost = criterion(preds, text)

    MORAN.zero_grad()
    cost.backward()
    optimizer.step()
    return cost
Beispiel #18
0
def val(dataset, criterion, max_iter=1000):
    """
    validation
    :param dataset: 验证集
    :param criterion: loss函数
    :param max_iter: 迭代次数
    :return:
    """
    print('Start val')
    data_loader = torch.utils.data.DataLoader(
        dataset,
        shuffle=False,
        batch_size=opt.batchSize,
        num_workers=int(opt.workers))  # opt.batchSize
    val_iter = iter(data_loader)
    max_iter = min(max_iter, len(data_loader))
    n_correct = 0
    n_total = 0
    loss_avg = utils.averager()

    for i in range(max_iter):
        data = val_iter.next()
        if opt.BidirDecoder:
            cpu_images, cpu_texts, cpu_texts_rev = data
            utils.loadData(image, cpu_images)
            t, l = converter.encode(cpu_texts, scanned=True)  # label convert
            t_rev, _ = converter.encode(cpu_texts_rev,
                                        scanned=True)  # rev label,用于双向lstm
            utils.loadData(text, t)
            utils.loadData(text_rev, t_rev)
            utils.loadData(length, l)
            preds0, preds1 = MORAN(image, length, text, text_rev, test=True)
            cost = criterion(torch.cat([preds0, preds1], 0),
                             torch.cat([text, text_rev], 0))
            preds0_prob, preds0 = preds0.max(1)
            preds0 = preds0.view(-1)
            preds0_prob = preds0_prob.view(-1)
            sim_preds0 = converter.decode(preds0.data, length.data)
            preds1_prob, preds1 = preds1.max(1)
            preds1 = preds1.view(-1)
            preds1_prob = preds1_prob.view(-1)
            sim_preds1 = converter.decode(preds1.data, length.data)
            sim_preds = []
            for j in range(cpu_images.size(0)):
                text_begin = 0 if j == 0 else length.data[:j].sum()
                if torch.mean(preds0_prob[text_begin:text_begin+len(sim_preds0[j].split('$')[0]+'$')]).data[0] >\
                 torch.mean(preds1_prob[text_begin:text_begin+len(sim_preds1[j].split('$')[0]+'$')]).data[0]:
                    sim_preds.append(sim_preds0[j].split('$')[0] + '$')
                else:
                    sim_preds.append(sim_preds1[j].split('$')[0][-1::-1] + '$')
        else:
            cpu_images, cpu_texts = data
            utils.loadData(image, cpu_images)
            t, l = converter.encode(cpu_texts, scanned=True)
            utils.loadData(text, t)
            utils.loadData(length, l)
            preds = MORAN(image, length, text, text_rev, test=True)
            cost = criterion(preds, text)
            _, preds = preds.max(1)
            preds = preds.view(-1)
            sim_preds = converter.decode(preds.data, length.data)
        # cal acc
        loss_avg.add(cost)
        for pred, target in zip(sim_preds, cpu_texts):
            if pred == target.lower():
                n_correct += 1
            n_total += 1

    print("correct / total: %d / %d, " % (n_correct, n_total))
    accuracy = n_correct / float(n_total)
    print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
    return accuracy
Beispiel #19
0
    def val(dataset, criterion, max_iter=10000, steps=None):
        data_loader = torch.utils.data.DataLoader(
            dataset,
            shuffle=False,
            batch_size=opt.batchSize,
            num_workers=int(opt.workers))  # opt.batchSize
        val_iter = iter(data_loader)
        max_iter = min(max_iter, len(data_loader))
        n_correct = 0
        n_total = 0
        distance = 0.0
        loss_avg = utils.averager()

        # f = open('./log.txt', 'a', encoding='utf-8')

        for i in range(max_iter):  # 设置很大的循环数值(达不到此值就会收敛)
            data = val_iter.next()
            if opt.BidirDecoder:
                cpu_images, cpu_texts, cpu_texts_rev = data  # data是dataloader导入的东西
                utils.loadData(image, cpu_images)
                t, l = converter.encode(cpu_texts,
                                        scanned=False)  # 这个encode是将字符encode成id
                t_rev, _ = converter.encode(cpu_texts_rev, scanned=False)
                utils.loadData(text, t)
                utils.loadData(text_rev, t_rev)
                utils.loadData(length, l)
                preds0, preds1 = MORAN(image,
                                       length,
                                       text,
                                       text_rev,
                                       debug=False,
                                       test=True,
                                       steps=steps)  # 跑模型HARN
                cost = criterion(torch.cat([preds0, preds1], 0),
                                 torch.cat([text, text_rev], 0))
                preds0_prob, preds0 = preds0.max(1)  # 取概率最大top1的结果
                preds0 = preds0.view(-1)
                preds0_prob = preds0_prob.view(-1)  # 维度的变形(好像是
                sim_preds0 = converter.decode(preds0.data,
                                              length.data)  # 将 id decode为字
                preds1_prob, preds1 = preds1.max(1)
                preds1 = preds1.view(-1)
                preds1_prob = preds1_prob.view(-1)
                sim_preds1 = converter.decode(preds1.data, length.data)
                sim_preds = []  # 预测出来的字
                for j in range(cpu_images.size(0)):  # 对字典进行处理,把单个字符连成字符串
                    text_begin = 0 if j == 0 else length.data[:j].sum()
                    if torch.mean(preds0_prob[text_begin:text_begin + len(sim_preds0[j].split('$')[0] + '$')]).item() > \
                            torch.mean(
                                preds1_prob[text_begin:text_begin + len(sim_preds1[j].split('$')[0] + '$')]).item():
                        sim_preds.append(sim_preds0[j].split('$')[0] + '$')
                    else:
                        sim_preds.append(sim_preds1[j].split('$')[0][-1::-1] +
                                         '$')
            else:  # 用不到的另一种情况
                cpu_images, cpu_texts = data
                utils.loadData(image, cpu_images)
                t, l = converter.encode(cpu_texts, scanned=True)
                utils.loadData(text, t)
                utils.loadData(length, l)
                preds = MORAN(image, length, text, text_rev, test=True)
                cost = criterion(preds, text)
                _, preds = preds.max(1)
                preds = preds.view(-1)
                sim_preds = converter.decode(preds.data, length.data)

            loss_avg.add(cost)  # 计算loss的平均值
            for pred, target in zip(
                    sim_preds, cpu_texts
            ):  # 与GroundTruth的对比,cpu_texts是GroundTruth,sim_preds是连接起来的字符串
                if pred == target.lower():  # 完全匹配量
                    n_correct += 1
                # f.write("pred %s\t      target %s\n" % (pred, target))
                distance += levenshtein(pred, target) / max(
                    len(pred), len(target))  # 莱温斯坦距离
                n_total += 1  # 完成了一个单词

        # f.close()

        # print and save     # 跑完之后输出到日志中
        for pred, gt in zip(sim_preds, cpu_texts):
            gt = ''.join(gt.split(opt.sep))
            print('%-20s, gt: %-20s' % (pred, gt))

        print("correct / total: %d / %d, " % (n_correct, n_total))
        print('levenshtein distance: %f' % (distance / n_total))
        accuracy = n_correct / float(n_total)
        log.scalar_summary('Validation/levenshtein distance',
                           distance / n_total, steps)
        log.scalar_summary('Validation/loss', loss_avg.val(), steps)
        log.scalar_summary('Validation/accuracy', accuracy, steps)
        print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
        return accuracy
Beispiel #20
0
def trainBatch():
    # 获取一个batch的数据 [images,label]
    data = train_iter.next()
    if opt.BidirDecoder:
        cpu_images, cpu_texts, cpu_texts_rev = data
        utils.loadData(image, cpu_images)
        t, l = converter.encode(cpu_texts, scanned=True)
        t_rev, _ = converter.encode(cpu_texts_rev, scanned=True)
        utils.loadData(text, t)
        utils.loadData(text_rev, t_rev)
        utils.loadData(length, l)
        # 双向lstm有两个结果
        preds0, preds1 = MORAN(image, length, text, text_rev)
        cost = criterion(torch.cat([preds0, preds1], 0),
                         torch.cat([text, text_rev], 0))
    else:
        cpu_images, cpu_texts = data
        utils.loadData(image, cpu_images)
        # 标签和每个标签的长度
        t, l = converter.encode(cpu_texts, scanned=True)
        utils.loadData(text, t)
        utils.loadData(length, l)
        # 单向lstm一个结果
        preds = MORAN(image, length, text, text_rev)
        cost = criterion(preds, text)

    MORAN.zero_grad()
    cost.backward()
    optimizer.step()
    return cost
Beispiel #21
0
converter = utils.strLabelConverterForAttention(args.alphabet, ':')
pred_dataset = dataset.lmdbDataset(root=os.path.join('dataset', args.data),
                                   transform=dataset.resizeNormalize(
                                       (100, 32)))
pred_loader = torch.utils.data.DataLoader(pred_dataset,
                                          shuffle=False,
                                          batch_size=args.batch_size,
                                          num_workers=args.num_workers)

image = torch.FloatTensor(args.batch_size, args.nc, args.imgH,
                          args.imgW).cuda()
text = torch.LongTensor(args.batch_size * 5).cuda()
length = torch.IntTensor(args.batch_size).cuda()
t, l = converter.encode(['0' * args.max_iter] * args.batch_size, scanned=True)
utils.loadData(text, t)
utils.loadData(length, l)

f = open(os.path.join('logger', args.data + '.csv'),
         'w',
         newline='',
         encoding='utf-8')
writer = csv.writer(f)

for i, (img_keys, cpu_images) in enumerate(pred_loader):
    utils.loadData(image, cpu_images)
    t, l = converter.encode(['0' * args.max_iter] * cpu_images.size(0),
                            scanned=True)
    utils.loadData(text, t)
    utils.loadData(length, l)
    preds0, _, preds1, _ = MORAN(image, length, text, text, test=True)