Beispiel #1
0
def train(trainloader, crnn, converter, criterion, optimizer):
    running_loss = 0.0
    started = time.time()
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs = inputs.to(device)

        optimizer.zero_grad()
        outputs = crnn(inputs)

        log_probs = torch.nn.functional.log_softmax(outputs, dim=2)
        input_lengths = torch.full(size=(inputs.size(0), ),
                                   fill_value=outputs.size(0),
                                   dtype=torch.long).to(device)
        target, target_lengths = converter.encode(labels)
        loss = criterion(log_probs, target, input_lengths, target_lengths)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:
            finished = time.time()
            print('[{}, {:5d}] loss: {:.8f} time: {:.2f}s'.format(
                epoch + 1, i + 1, running_loss / 100, finished - started))
            running_loss = 0.0
            started = finished
Beispiel #2
0
def validate(validloader, crnn, converter):
    correct = 0
    total = 0
    with torch.no_grad():
        for data in validloader:
            inputs, labels = data
            inputs = inputs.to(device)

            outputs = crnn(inputs)

            _, predicted = outputs.max(2)

            predicted = predicted.transpose(1, 0).contiguous().view(-1)
            input_lengths = torch.full(size=(inputs.size(0), ),
                                       fill_value=outputs.size(0),
                                       dtype=torch.long).to(device)
            sim_preds = converter.decode(predicted.data,
                                         input_lengths.data,
                                         raw=False)
            targets = [i.decode('utf-8', 'strict') for i in labels]
            for sim_pred, target in zip(sim_preds, targets):
                total += 1
                if sim_pred == target:
                    correct += 1

    print('[{}] Accuracy of the network on the {} validation images: {:.2%}'.
          format(epoch + 1, total, correct / total))
    with open('{}/train.log'.format(args.save_path), 'a') as f:
        f.write(
            '[{}] Accuracy of the network on the {} validation images: {:.2%}\n'
            .format(epoch + 1, total, correct / total))
Beispiel #3
0
def val(net, val_loader, criterion, epoch, max_i=1000):
    print('================Start val=================')
    for p in crnn.parameters():
        p.requires_grad = False
    net.eval()
    i = 0
    n_correct = 0
    n_all = 0
    loss_avg = utils.averager()

    for i_batch, (image, index) in enumerate(val_loader):
        image = image.to(device)
        print('image.shape:', image.shape)
        label = utils.get_batch_label(val_dataset, index)
        # [41,batch,nclass]
        preds = crnn(image)
        batch_size = image.size(0)
        # index = np.array(index.data.numpy())
        label_text, label_length = converter.encode(label)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        cost = criterion(preds, label_text, preds_size,
                         label_length) / batch_size
        loss_avg.add(cost)
        # [41,batch]
        _, preds = preds.max(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        # preds = preds.transpose(1, 0).reshape(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        print('label:', label[:2])
        print('sim_preds:', sim_preds[:2])
        # print(list(zip(sim_preds, label)))

        n_all += len(label)
        for pred, target in list(zip(sim_preds, label)):
            if pred == target:
                n_correct += 1

        if (i_batch + 1) % params.displayInterval == 0:
            print('[%d/%d][%d/%d]' %
                  (epoch, params.epochs, i_batch, len(val_loader)))
        if i_batch == max_i:
            break
    raw_preds = converter.decode(preds.data, preds_size.data,
                                 raw=True)[:params.n_test_disp]
    for raw_pred, pred, gt in zip(raw_preds, sim_preds, label):
        print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))
    #
    # print('n_correct:',n_correct)
    # accuracy = n_correct / float(max_i * params.val_batchSize)
    accuracy = n_correct / n_all
    print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
    return accuracy
Beispiel #4
0
def val(net, criterion, max_iter=3):
    # print('Start val')

    for p in crnn.parameters():
        p.requires_grad = False

    net.eval()
    val_iter = iter(test_loader)

    i = 0
    n_correct = 0
    loss_avg = utils.averager()

    max_iter = min(max_iter, len(test_loader))
    for i in range(max_iter):
        data = val_iter.next()
        i += 1
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        utils.loadData(image, cpu_images)
        if ifUnicode:
             cpu_texts = [clean_txt(tx.decode('utf-8'))  for tx in cpu_texts]
        t, l = converter.encode(cpu_texts)
        utils.loadData(text, t)
        utils.loadData(length, l)

        preds = crnn(image)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        cost = criterion(preds, text, preds_size, length) / batch_size
        loss_avg.add(cost)

        _, preds = preds.max(2)
        preds = preds.squeeze(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        for pred, target in zip(sim_preds, cpu_texts):
            if pred.strip() == target.strip():
                n_correct += 1

    raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:opt.n_test_disp]
    # for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts):
         # print((pred, gt))
    accuracy = n_correct / float(max_iter * opt.batchSize)
    testLoss = loss_avg.val()
    # print('Test loss: %f, accuray: %f' % (testLoss, accuracy))
    return testLoss, accuracy
Beispiel #5
0
def crnn_predict(crnn, img, transformer, decoder='bestPath', normalise=False):
    """
    Params
    ------
    crnn: torch.nn
        Neural network architecture
    transformer: torchvision.transform
        Image transformer
    decoder: string, 'bestPath' or 'beamSearch'
        CTC decoder method.
    
    Returns
    ------
    out: a list of tuples (predicted alphanumeric sequence, confidence level)
    """

    classes = string.ascii_uppercase + string.digits
    image = img.copy()

    image = transformer(image).to(device)
    image = image.view(1, *image.size())

    # forward pass (convert to numpy array)
    preds_np = crnn(image).data.cpu().numpy().squeeze()

    # move first column to last (so that we can use CTCDecoder as it is)
    preds_np = np.hstack([preds_np[:, 1:], preds_np[:, [0]]])

    preds_sm = softmax(preds_np, axis=1)
    #     preds_sm = np.divide(preds_sm, prior)

    # normalise is only suitable for best path
    if normalise == True:
        preds_sm = np.divide(preds_sm, prior)

    if decoder == 'bestPath':
        output = ctcBestPath(preds_sm, classes)

    elif decoder == 'beamSearch':
        output = ctcBeamSearch(preds_sm, classes, None)
    else:
        raise Exception("Invalid decoder method. \
                        Choose either 'bestPath' or 'beamSearch'")

    return output
Beispiel #6
0
def model_infer(crnn, converter, cvImg):
    # cvImg = resize_img(cvImg)
    # print(cvImg.shape)
    image = torch.from_numpy(cvImg).type(torch.FloatTensor)
    image.sub_(params_test.mean).div_(params_test.std)
    image = image.unsqueeze(0)
    image = image.unsqueeze(0)
    image = image.to(device)
    preds_tabel = crnn(image)
    preds_tabel = preds_tabel.permute(1, 0, 2)
    pro, preds = preds_tabel.max(2)
    # print(pro)
    # print(preds)
    prob_s = torch.prod(pro).cpu().numpy()
    score = 1.0
    preds = preds.transpose(1, 0).contiguous().view(-1)
    batch_size = image.size(0)
    preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
    reg = converter.decode(preds.data, preds_size.data, raw=False)
    return reg
Beispiel #7
0
def trainBatch(net, criterion, optimizer, flage=False):
    data = train_iter.next()
    cpu_images, cpu_texts = data  # decode utf-8 to unicode
    if ifUnicode:
        cpu_texts = [clean_txt(tx.decode('utf-8'))  for tx in cpu_texts]
    
    batch_size = cpu_images.size(0)
    utils.loadData(image, cpu_images)
    t, l = converter.encode(cpu_texts)
    utils.loadData(text, t)
    utils.loadData(length, l)

    preds = crnn(image)
    preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
    cost = criterion(preds, text, preds_size, length) / batch_size
    crnn.zero_grad()
    cost.backward()
    if flage:
        lr = 0.0001
        optimizer = optim.Adadelta(crnn.parameters(), lr=lr)
    optimizer.step()
    return cost
Beispiel #8
0
def train(crnn, train_loader, criterion, epoch):
    for p in crnn.parameters():
        p.requires_grad = True
    crnn.train()
    #loss averager
    loss_avg = utils.averager()
    for i_batch, (image, index) in enumerate(train_loader):
        #[b,c,h,w] [32,1,32,160]
        image = image.to(device)
        print('image.shape:', image.shape)
        batch_size = image.size(0)
        #['xxx','xxxx',...batch]
        label = utils.get_batch_label(dataset, index)
        #[41,batch,nclass]
        preds = crnn(image)
        # print('preds.shape',preds.shape)
        # index = np.array(index.data.numpy())
        #[, , ,]    [len(lable[0]),len(lable[1]),...]
        label_text, label_length = converter.encode(label)
        # print('label_text:', len(label_text))
        # print('label_length:', label_length)
        #[41,41,41,...]*batch
        preds_size = torch.IntTensor([preds.size(0)] * batch_size)
        # print('preds.shape, label_text.shape, preds_size.shape, label_length.shape',preds.shape, label_text.shape, preds_size.shape, label_length.shape)
        # torch.Size([41, 32, 6736]) torch.Size([320]) torch.Size([320]) torch.Size([320])
        cost = criterion(preds, label_text, preds_size,
                         label_length) / batch_size
        # print('cost:',cost)
        crnn.zero_grad()
        cost.backward()
        optimizer.step()

        loss_avg.add(cost)

        if (i_batch + 1) % params.displayInterval == 0:
            print('[%d/%d][%d/%d] Loss: %f' %
                  (epoch, params.epochs, i_batch, len(train_loader),
                   loss_avg.val()))
            loss_avg.reset()
preds_size = torch.tensor([33])

converter = utils.strLabelConverter(alphabet)

crnn = crnn.CRNN(imgH, nc, nclass, nh).to(device)
crnn.load_state_dict(torch.load('/content/drive/My Drive/WeightNet/OCR(3.0)'))

tp_1, fp_1, fn_1 = 0, 0, 0  # True Positive, False positive, False negative for first head
tp_2, fp_2, fn_2 = 0, 0, 0  # for double head
result = ['', '']

with torch.no_grad():
    for x, y in test_loader:

        cpu_images, cpu_texts = x.to(device), y
        preds = crnn(cpu_images)
        preds = preds.view(-1, preds_size.item(), 1, 23)

        for idx, head in enumerate(preds):

            _, preds1 = head.max(2)
            preds1 = preds1.transpose(1, 0).contiguous().view(-1)
            raw_pred = converter.decode(preds1.data,
                                        preds_size.data,
                                        raw=False)

            postpro = re.findall(temp, raw_pred)
            sim_pred = postpro[0] if postpro != [] else 'Unknown'

            result[idx] = sim_pred
Beispiel #10
0
def detect(path_name_img):

    image, image_orig, img_shape = prepare_image(path_name_img)
    test_output = model(image.to(device))

    w, h, _ = img_shape
    mask_gray = cv2.resize(
        (test_output[0].cpu().detach().numpy().squeeze() > 0.6).astype(
            np.uint8),
        dsize=(h, w))
    mask_rgb = cv2.cvtColor(mask_gray, cv2.COLOR_GRAY2RGB)
    contours, hierarchy = cv2.findContours(mask_gray.copy(), cv2.RETR_EXTERNAL,
                                           cv2.CHAIN_APPROX_SIMPLE)

    count_contours = len(contours)
    outputs = []
    mask_list = []
    nomer_list = []

    for cont in range(count_contours):
        # делаем несколько масок
        mask = np.zeros_like(mask_gray)
        mask = cv2.drawContours(mask, [contours[cont]], -1, (255, 0, 0), 3,
                                cv2.LINE_AA,
                                np.expand_dims(hierarchy[:, cont], axis=1), 1)
        mask = convex_hull_image(mask).astype(np.uint8)
        if np.sum(mask) < 1500:
            continue

        mask_list.append(mask)
        a3 = cv2.cvtColor(mask, cv2.COLOR_GRAY2RGB)
        cord = rectDetect.detect([a3 * 255])
        # x1, x2, x3, x4 = cord[0]

        zones = rectDetect.get_cv_zonesBGR(image_orig.copy(), cord)
        outputs.append(zones[0])

    for nomer in outputs:
        image = nomer_aug(image=nomer)['image'].unsqueeze(0)
        preds1 = crnn(image.to(device))
        preds1 = preds1.view(-1, 33, 1, 23)
        conf = [0, 0]

        for idx, head in enumerate(preds1):
            _, preds = head.max(2)
            batch = preds.size(1)
            preds = preds.transpose(1, 0).contiguous().view(-1)

            sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
            postpro = re.findall(temp, sim_pred)

            sim_pred = postpro[0] if postpro != [] else 'Unknown'
            conf[idx] = sim_pred

        # if conf[0] == conf[1]:
        #   nomer_list.append(conf[0])
        # else:
        #   nomer_list.append('Unknown')
        nomer_list.append(conf)

    return nomer_list, outputs, mask_rgb, image_orig