Esempio n. 1
0
def val(net, val_loader, criterion, iteration, max_i=1000):
    print('Start val')
    for p in crnn.parameters():
        p.requires_grad = False
    net.eval()
    i = 0
    n_correct = 0
    loss_avg = utils.averager()

    for i_batch, (image, index) in enumerate(val_loader):
        if args.cuda:
            image = image.cuda()
            criterion = criterion.cuda()
        label = utils.get_batch_label(val_dataset, index)
        preds = crnn(image)
        batch_size = image.size(0)
        index = np.array(index.data.numpy())
        text, length = converter.encode(label)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        cost = criterion(preds, text, preds_size, length) / batch_size
        loss_avg.add(cost)
        _, preds = preds.max(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        for pred, target in zip(sim_preds, label):
            if pred == target:
                n_correct += 1

        if (i_batch + 1) % params.displayInterval == 0:
            print('[%d/%d][%d/%d]' %
                  (iteration, params.niter, i_batch, len(val_loader)))

        if i_batch == max_i:
            break
    raw_preds = converter.decode(preds.data, preds_size.data,
                                 raw=True)[:params.n_test_disp]
    for raw_pred, pred, gt in zip(raw_preds, sim_preds, label):
        print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))

    print(n_correct)
    print(max_i * params.val_batchSize)
    accuracy = n_correct / float(max_i * params.val_batchSize)
    print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))

    return accuracy
Esempio n. 2
0
def val(net, dataset, criterion, max_iter=100):
    print('Start val')

    for p in crnn.parameters():
        p.requires_grad = False

    net.eval()
    data_loader = torch.utils.data.DataLoader(
        dataset, shuffle=True, batch_size=opt.batchSize, num_workers=int(opt.workers))
    val_iter = iter(data_loader)

    i = 0
    n_correct = 0
    loss_avg = utils.averager()

    max_iter = min(max_iter, len(data_loader))
    for i in range(max_iter):
        data = val_iter.next()
        i += 1
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        utils.loadData(image, cpu_images)
        t, l = converter.encode(cpu_texts)
        utils.loadData(text, t)
        utils.loadData(length, l)

        preds = crnn(image)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        cost = criterion(preds, text, preds_size, length) / batch_size
        loss_avg.add(cost)

        _, preds = preds.max(2)
        preds = preds.squeeze(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        for pred, target in zip(sim_preds, cpu_texts):
            if pred == target.lower():
                n_correct += 1

    raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:opt.n_test_disp]
    for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts):
        print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))

    accuracy = n_correct / float(max_iter * opt.batchSize)
    print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
Esempio n. 3
0
def val(net, dataset, criterion, max_iter=100):
    print('Start val')

    for p in crnn.parameters():
        p.requires_grad = False

    net.eval()
    data_loader = torch.utils.data.DataLoader(
        dataset, shuffle=True, batch_size=opt.batchSize, num_workers=int(opt.workers))
    val_iter = iter(data_loader)

    i = 0
    n_correct = 0
    loss_avg = utils.averager()

    for i in range(min(max_iter, len(data_loader))):
        data = val_iter.next()
        i += 1
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        utils.loadData(image, cpu_images)
        t, l = converter.encode(cpu_texts)
        utils.loadData(text, t)
        utils.loadData(length, l)

        preds = crnn(image)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        cost = criterion(preds, text, preds_size, length) / batch_size
        loss_avg.add(cost)

        _, preds = preds.max(2)
        preds = preds.squeeze(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        for pred, target in zip(sim_preds, cpu_texts):
            if pred == target.lower():
                n_correct += 1

    raw_preds = converter.decode(preds.data, preds_size.data, raw=True)
    for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts):
        print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))

    accuracy = n_correct / float(max_iter * opt.batchSize)
    print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
Esempio n. 4
0
def val(net, dataset, criterion, model_path, max_iter=np.inf):
    for p in net.parameters():
        p.requires_grad = False

    net.eval()
    data_loader = torch.utils.data.DataLoader(
        dataset, shuffle=True, batch_size=opt.batchSize, num_workers=int(opt.workers))
    val_iter = iter(data_loader)

    i = 0
    n_correct = 0
    n_correct_greed = 0
    # loss averager
    loss_avg = utils.averager()
    loss_avg.reset()

    max_iter = min(max_iter, len(data_loader))
    for i in range(max_iter):
        data = val_iter.next()
        i += 1
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        utils.loadData(image, cpu_images)
        t, l = converter.encode(cpu_texts)
        utils.loadData(text, t)
        utils.loadData(length, l)

        preds = net(image)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        cost = criterion(preds, text, preds_size, length) / batch_size
        loss_avg.add(cost)

        _, preds = preds.max(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds_greed = converter.decode(preds.data, preds_size.data, raw=False)
        for idx, (pred_greed, target) in enumerate(zip(sim_preds_greed, cpu_texts)):
            if pred_greed == target.lower():
                n_correct_greed += 1

    raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:opt.n_test_disp]
    accuracy_greed = n_correct_greed / float(max_iter * opt.batchSize)

    print('test loss: %f, accuray_greed: %f, model: %s' % (loss_avg.val(), accuracy_greed, model_path))
Esempio n. 5
0
    def train(self, max_iter=np.inf):
        loss_avg = utils.averager()
        prev_cer = 100
        prev_wer = 100
        write_info(self.model, self.opt)
        self.writer = Writer(self.opt.lr, self.opt.nepoch, self.opt.node_dir, use_tb=self.opt.use_tb)
        self.iterations = 0
        for epoch in range(self.opt.nepoch):
            self.writer.epoch = epoch
            self.writer.nbatches = len(self.train_loader)
            self.train_iter = iter(self.train_loader)
            i = 0
            while i < len(self.train_loader):
                if self.iterations % self.opt.valInterval == 0:
                    valloss, val_CER, val_WER = self.eval(self.test_data, max_iter=self.val2_iter)
                    self.writer.update_valloss(valloss.val().item(), val_CER)
                    # trloss, trER = self.eval(self.train_data, max_iter=self.val1_iter)
                    # self.writer.update_trloss2(trloss.val().item(), trER)
                    torch.save(
                            self.model.state_dict(), '{0}/{1}.pth'.format(self.opt.node_dir,'latest'))
                    if val_CER < prev_cer:
                        torch.save(
                            self.model.state_dict(), '{0}/{1}.pth'.format(self.opt.node_dir,'best_cer'))
                        prev_cer = val_CER
                        self.writer.update_best_er(val_CER, self.iterations)
                    if val_WER < prev_wer:
                        torch.save(
                            self.model.state_dict(), '{0}/{1}.pth'.format(self.opt.node_dir,'best_wer'))
                        prev_wer = val_WER
                        # self.writer.update_best_er(val_WER, self.iterations)
                cost = self.trainBatch()
                loss_avg.add(cost)
                self.iterations += 1
                i += 1
                self.writer.iterations = self.iterations
                self.writer.batch = i

                if self.iterations % self.opt.displayInterval == 0:
                    self.writer.update_trloss(loss_avg.val().item())
                    loss_avg.reset()
        self.writer.end()
        return
Esempio n. 6
0
def train_fn(model, data_loader, optimizer):
    model.train()
    tk = tqdm(data_loader, total=len(data_loader))
    fin_loss = 0
    loss_avg = utils.averager()
    for data in tk:
        imgs, texts = data.values()
        utils.loadData(image, imgs)
        batch_size = imgs.size(0)
        t, l = converter.encode(texts)
        utils.loadData(text, t)
        utils.loadData(length, l)
        optimizer.zero_grad()
        preds = model(image)
        preds_length = torch.full(size=(batch_size, ),
                                  fill_value=preds.size(0),
                                  dtype=torch.int32)
        loss = criterion(preds, text, preds_length, length)
        loss.backward()
        optimizer.step()
        fin_loss += loss.item()
    return fin_loss / len(data_loader)
Esempio n. 7
0
def train(crnn, train_loader, criterion, epoch):
    for p in crnn.parameters():
        p.requires_grad = True
    crnn.train()
    #loss averager
    loss_avg = utils.averager()
    for i_batch, (image, index) in enumerate(train_loader):
        #[b,c,h,w] [32,1,32,160]
        image = image.to(device)
        print('image.shape:', image.shape)
        batch_size = image.size(0)
        #['xxx','xxxx',...batch]
        label = utils.get_batch_label(dataset, index)
        #[41,batch,nclass]
        preds = crnn(image)
        # print('preds.shape',preds.shape)
        # index = np.array(index.data.numpy())
        #[, , ,]    [len(lable[0]),len(lable[1]),...]
        label_text, label_length = converter.encode(label)
        # print('label_text:', len(label_text))
        # print('label_length:', label_length)
        #[41,41,41,...]*batch
        preds_size = torch.IntTensor([preds.size(0)] * batch_size)
        # print('preds.shape, label_text.shape, preds_size.shape, label_length.shape',preds.shape, label_text.shape, preds_size.shape, label_length.shape)
        # torch.Size([41, 32, 6736]) torch.Size([320]) torch.Size([320]) torch.Size([320])
        cost = criterion(preds, label_text, preds_size,
                         label_length) / batch_size
        # print('cost:',cost)
        crnn.zero_grad()
        cost.backward()
        optimizer.step()

        loss_avg.add(cost)

        if (i_batch + 1) % params.displayInterval == 0:
            print('[%d/%d][%d/%d] Loss: %f' %
                  (epoch, params.epochs, i_batch, len(train_loader),
                   loss_avg.val()))
            loss_avg.reset()
Esempio n. 8
0
def val(crnn, valid_loader, criterion, max_iter=1000):
    print('Start val')
    for p in crnn.parameters():
        p.requires_grad = False
    crnn.eval()

    val_iter = iter(valid_loader)

    i = 0
    n_correct = 0
    loss_avg = utils.averager()

    max_iter = min(max_iter, len(valid_loader))
    for i in range(max_iter):
        names, images, texts = val_iter.next()
        batch_size = images.size(0)
        t, l = converter.encode(texts)
        images = images.cuda()
        preds = crnn(images)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        cost = criterion(preds, t, preds_size, l) / batch_size
        loss_avg.add(cost)

        _, preds = preds.max(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        for pred, target in zip(sim_preds, texts):
            if pred == target:
                n_correct += 1

    raw_preds = converter.decode(preds.data, preds_size.data,
                                 raw=True)[:opt.n_test_disp]
    for name, raw_pred, pred, gt in zip(names, raw_preds, sim_preds, texts):
        print('%-20s:%-20s => %-20s, gt: %-20s' % (name, raw_pred, pred, gt))

    accuracy = n_correct / float(max_iter * opt.batchSize)
    print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
    return accuracy
Esempio n. 9
0
def eval_fn(model, data_loader):
    model.eval()
    tk = tqdm(data_loader, total=len(data_loader))
    n_correct = 0
    loss_avg = utils.averager()
    with torch.no_grad():
        for data in tk:
            imgs, texts = data.values()
            utils.loadData(image, imgs)
            batch_size = imgs.size(0)
            t, l = converter.encode(texts)
            utils.loadData(text, t)
            utils.loadData(length, l)
            preds = model(image)
            # print(preds.size())
            preds_length = torch.full(size=(batch_size, ),
                                      fill_value=preds.size(0),
                                      dtype=torch.int32)
            loss = criterion(preds, text, preds_length, length)
            loss_avg.add(loss)
            preds = f.softmax(preds, dim=2)
            _, preds = preds.max(2)
            preds = preds.transpose(1, 0).contiguous().view(-1)
            sim_preds = converter.decode(preds, preds_length)
            cpu_texts_decode = []
            for i in texts:
                cpu_texts_decode.append(i)
            for pred, target in zip(sim_preds, cpu_texts_decode):
                if pred == target:
                    n_correct += 1

        raw_preds = converter.decode(preds, preds_length, raw=True)[:10]
        for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts_decode):
            print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))

        accuracy = n_correct / float(len(data_loader) * config.BATCH_SIZE)
        print('Val.loss: %f, accuracy: %f' % (loss_avg.val(), accuracy))
Esempio n. 10
0
def train(crnn, train_loader, criterion, epoch):
    for p in crnn.parameters():
        p.requires_grad = True
    crnn.train()
    loss_avg = utils.averager()
    for i_batch, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        preds = crnn(images)
        batch_size = images.size(0)
        text, length = converter.encode(labels)
        # print(converter.decode(text,length))
        preds_size = torch.IntTensor([preds.size(0)] * batch_size)

        cost = criterion(preds, text, preds_size, length) / batch_size
        crnn.zero_grad()
        cost.backward()
        optimizer.step()
        loss_avg.add(cost)

        if (i_batch + 1) % arg.displayInterval == 0:
            print('[%d/%d][%d/%d] Loss: %f' %
                  (epoch, arg.nepoch, i_batch, len(train_loader),
                   loss_avg.val()))
            loss_avg.reset()
Esempio n. 11
0
def val(net, data_loader, criterion, max_iter=100):
    print('Start val')

    net.eval()
    val_iter = iter(data_loader)

    n_correct = 0
    loss_avg = utils.averager()

    max_iter = min(max_iter, len(data_loader))
    for _ in range(max_iter):
        data = val_iter.next()
        cpu_images, text, length, cpu_texts = data
        image = cpu_images.to(device)
        batch_size = cpu_images.size(0)

        with torch.no_grad():
            preds = crnn(image)
        preds_size = torch.IntTensor([preds.size(0)] * batch_size)
        cost = criterion(preds, text, preds_size, length)
        loss_avg.add(cost)

        _, preds = preds.max(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        for pred, target in zip(sim_preds, cpu_texts):
            if pred == target.lower():
                n_correct += 1

    raw_preds = converter.decode(preds.data, preds_size.data,
                                 raw=True)[:opt.n_test_disp]
    for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts):
        print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))

    accuracy = n_correct / float(max_iter * opt.batchSize)
    print('Test loss: %f, accuray: %.2f%%' % (loss_avg.val(), accuracy * 100))
Esempio n. 12
0
    def evaluate(net, dataset, criterion, max_iter=100):
        for p in net.parameters():
            p.requires_grad = False
        net.eval()
        data_loader = DataLoader(dataset, batch_size=config.batch_size)
        val_iter = iter(data_loader)
        i = 0
        n_correct = 0
        n_total = 0
        table_correct = 0
        table_total = 0
        loss_avg = utils.averager()
        # max_iter = min(max_iter, len(data_loader))
        max_iter = len(data_loader)
        for i in range(max_iter):
            data = val_iter.next()
            i += 1
            out_pred = net(data)
            loss = criterion(out_pred, data.y.cuda())
            loss_avg.add(loss)

            _, out_pred = out_pred.max(1)
            label = data.y.detach().cpu().numpy()
            out_pred = out_pred.detach().cpu().numpy()
            if (label == out_pred).all():
                table_correct = table_correct + 1
            table_total = table_total + 1
            n_correct = n_correct + (label == out_pred).sum()
            n_total = n_total + label.shape[0]
            # print("correct:",n_correct,label.shape[0])
        accuracy = n_correct / float(n_total)
        table_accuracy = table_correct / float(table_total)

        logging.info('Test cell loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
        logging.info('Test one table loss: %f, accuray: %f' % (loss_avg.val(), table_accuracy))
        return loss_avg
Esempio n. 13
0
def test(model, crit, dataset, vocab, opt, writer):
    model.eval()
    loss_avg = averager()
    writer = SummaryWriter()
    loader = DataLoader(dataset, batch_size=opt["batch_size"], shuffle=True)
    scorer = COCOScorer()
    gt_dataframe = json_normalize(
        json.load(open(opt["input_json"]))['sentences'])
    gts = convert_data_to_coco_scorer_format(gt_dataframe)
    results = []
    samples = {}
    for data in loader:
        # forward the model to get loss
        fc_feats = data['fc_feats'].cuda()
        labels = data['labels'].cuda()
        masks = data['masks'].cuda()
        video_ids = data['video_ids']
        # clip_nums = data['clip_num']
        # sorted_clip_nums, indices = torch.sort(clip_nums, descending=True)
        # _, desorted_indices = torch.sort(indices, descending=False)
        # fc_feats = fc_feats[indices]
        # pack = rnn.pack_padded_sequence(fc_feats, sorted_clip_nums, batch_first=True)
        # forward the model to also get generated samples for each image
        with torch.no_grad():
            seq_probs, seq_preds = model(fc_feats, mode='inference', opt=opt)

        sents = utils.decode_sequence(vocab, seq_preds)

        for k, sent in enumerate(sents):
            video_id = video_ids[k]
            samples[video_id] = [{'image_id': video_id, 'caption': sent}]

    with suppress_stdout_stderr():
        valid_score = scorer.score(gts, samples, samples.keys())
    results.append(valid_score)
    print(valid_score)
Esempio n. 14
0
def train(crnn, train_loader, criterion, optimizer, valid_loader):
    for p in crnn.parameters():
        p.requires_grad = True
    crnn.train()
    train_iter = iter(train_loader)
    # loss averager
    loss_avg = utils.averager()
    for i in range(len(train_loader)):
        data = train_iter.next()
        _, images, texts = data
        batch_size = images.size(0)
        t, l = converter.encode(texts)
        images = images.cuda()
        preds = crnn(images)
        preds_size = torch.IntTensor([preds.size(0)] * batch_size)
        cost = criterion(preds, t, preds_size, l) / batch_size
        crnn.zero_grad()
        cost.backward()
        optimizer.step()
        loss_avg.add(cost)
        if (i + 1) % opt.displayInterval == 0:
            print('[%d/%d][%d/%d] Loss: %f' %
                  (epoch, opt.nepoch, i, len(train_loader), loss_avg.val()))
            loss_avg.reset()
Esempio n. 15
0
    crnn.cuda()
    crnn = torch.nn.DataParallel(crnn, device_ids=range(opt.ngpu))
    image = image.cuda()
    textAttention = textAttention.cuda()
    criterionAttention = criterionAttention.cuda()
    criterionCTC = criterionCTC.cuda()

image = Variable(image)
textAttention = Variable(textAttention)
lengthAttention = Variable(lengthAttention)
textCTC = Variable(textCTC)
lengthCTC = Variable(lengthCTC)


# loss averager
loss_avg = utils.averager()
loss_CTC = utils.averager()
loss_Attention = utils.averager()

# setup optimizer
if opt.adam:
    optimizer = optim.Adam(crnn.parameters(), lr=opt.lr,
                           betas=(opt.beta1, 0.999))
elif opt.adadelta:
    optimizer = optim.Adadelta(crnn.parameters(), lr=opt.lr)
else:
    optimizer = optim.RMSprop(crnn.parameters(), lr=opt.lr)


def val(net, valdataset, criterionAttention,criterionCTC, max_iter=100):
    print('Start val')
Esempio n. 16
0
def val(net, valdataset, criterionAttention, criterionCTC, max_iter=100):
    print('Start val')

    for p in crnn.parameters():
        p.requires_grad = False

    net.eval()
    val_sampler = dataset.randomSequentialSampler(valdataset, opt.batchSize)
    data_loader = torch.utils.data.DataLoader(valdataset,
                                              batch_size=opt.batchSize,
                                              shuffle=False,
                                              sampler=val_sampler,
                                              num_workers=int(opt.workers),
                                              collate_fn=dataset.alignCollate(
                                                  imgH=opt.imgH,
                                                  imgW=opt.imgW,
                                                  keep_ratio=opt.keep_ratio))
    # data_loader = torch.utils.data.DataLoader(
    #     dataset, shuffle=True, batch_size=opt.batchSize, num_workers=int(opt.workers))
    val_iter = iter(data_loader)

    i = 0
    n_correct = 0
    n_correctCTC = 0
    n_correctAttention = 0
    distanceCTC = 0
    distanceAttention = 0
    sum_charNum = 0
    loss_avg = utils.averager()

    max_iter = min(max_iter, len(data_loader))
    for i in range(max_iter):
        data = val_iter.next()
        i += 1
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        utils.loadData(image, cpu_images)
        tAttention, lAttention = converterAttention.encode(cpu_texts)
        utils.loadData(textAttention, tAttention)
        utils.loadData(lengthAttention, lAttention)
        tCTC, lCTC = converterCTC.encode(cpu_texts)
        utils.loadData(textCTC, tCTC)
        utils.loadData(lengthCTC, lCTC)
        # print (image)

        if opt.lang:
            predsCTC, predsAttention = crnn(image, lengthAttention,
                                            textAttention)
        else:
            predsCTC, predsAttention = crnn(imageAttention, lengthAttention)
        costAttention = criterionAttention(predsAttention, textAttention)
        preds_size = Variable(torch.IntTensor([predsCTC.size(0)] * batch_size))
        costCTC = criterionCTC(predsCTC, textCTC, preds_size,
                               lengthCTC) / batch_size
        loss_avg.add(costAttention)
        loss_avg.add(costCTC.cuda())

        _, predsAttention = predsAttention.max(1)
        predsAttention = predsAttention.view(-1)
        sim_predsAttention = converterAttention.decode(predsAttention.data,
                                                       lengthAttention.data)

        _, predsCTC = predsCTC.max(2)
        predsCTC = predsCTC.transpose(1, 0).contiguous().view(-1)
        sim_predsCTC = converterCTC.decode(predsCTC.data,
                                           preds_size.data,
                                           raw=False)

        for i, cpu_text in enumerate(cpu_texts):
            gtText = cpu_text.decode('utf-8')
            CTCText = sim_predsCTC[i]
            if isinstance(CTCText, str):
                CTCText = CTCText.decode('utf-8')
            AttentionText = sim_predsAttention[i]
            print('gtText: %s' % gtText)
            print('CTCText: %s' % CTCText)
            print('AttentionText: %s' % AttentionText)
            if gtText == CTCText:
                n_correctCTC += 1
            if gtText == AttentionText:
                n_correctAttention += 1
            distanceCTC += Levenshtein.distance(CTCText, gtText)
            distanceAttention += Levenshtein.distance(AttentionText, gtText)
            sum_charNum = sum_charNum + len(gtText)

    correctCTC_accuracy = n_correctCTC / float(max_iter * batch_size)
    cerCTC = distanceCTC / float(sum_charNum)
    print('Test CERCTC: %f, accuracyCTC: %f' % (cerCTC, correctCTC_accuracy))
    correctAttention_accuracy = n_correctAttention / float(
        max_iter * batch_size)
    cerAttention = distanceAttention / float(sum_charNum)
    print('Test CERAttention: %f, accuricyAttention: %f' %
          (cerAttention, correctAttention_accuracy))
Esempio n. 17
0
def val(net, val_dataset, criterion, max_iter=100):
    print('Start val')

    for p in model.parameters():
        p.requires_grad = False

    net.eval()
    # data_loader = torch.utils.data.DataLoader(
    #     dataset, shuffle=False, batch_size=opt.batchSize, num_workers=int(opt.workers))
    # val_iter = iter(data_loader)
    data_loader = torch.utils.data.DataLoader(val_dataset,
                                              shuffle=False,
                                              batch_size=opt.batchSize,
                                              num_workers=int(opt.workers),
                                              collate_fn=dataset.alignCollate(
                                                  imgH=opt.imgH,
                                                  imgW=opt.imgW,
                                                  keep_ratio=opt.keep_ratio))
    val_iter = iter(data_loader)
    i = 0
    n_correct = 0
    loss_avg = utils.averager()

    edt_dst = 0

    max_iter = max(max_iter, len(data_loader))
    for i in range(max_iter):
        data = val_iter.next()
        i += 1
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        utils.loadData(image, cpu_images)
        t, l = converter.encode(cpu_texts)
        utils.loadData(text, t)
        utils.loadData(length, l)

        preds = model(image)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        cost = criterion(preds, text, preds_size, length) / batch_size
        loss_avg.add(cost)

        _, preds = preds.max(2)
        # preds = preds.squeeze(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        for pred, target in zip(sim_preds, cpu_texts):
            if pred == target:
                n_correct += 1
            else:
                dst = editdistance.eval(pred, target)
                # print('pred=', pred, ' target=', target, ' dst=', dst)
                edt_dst += dst

    raw_preds = converter.decode(preds.data, preds_size.data,
                                 raw=True)[:opt.n_test_disp]
    for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts):
        print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))

    accuracy = n_correct / float(max_iter * opt.batchSize)
    avg_edt_dst = edt_dst / float(max_iter * opt.batchSize)
    print('Test loss: %f, accuray: %f, avg_edt_dst: %f' %
          (loss_avg.val(), accuracy, avg_edt_dst))
Esempio n. 18
0
def val(net,
        dataset,
        criterion,
        best_accuracy,
        epoch,
        i,
        best_epoch,
        best_i,
        max_iter=100):
    print('Validating...')

    for para in crnn.parameters():
        para.requires_grad = False

    net.eval()
    data_loader = torch.utils.data.DataLoader(dataset,
                                              shuffle=True,
                                              batch_size=opt.batch_size,
                                              num_workers=int(opt.workers))
    val_iter = iter(data_loader)

    n_correct = 0
    loss_avg_ = utils.averager()

    max_iter = min(max_iter, len(data_loader))
    for j in range(max_iter):
        data = val_iter.next()
        j += 1
        cpu_images, cpu_texts, _ = data
        batch_size = cpu_images.size(0)
        utils.loadData(image, cpu_images)
        t, length_ = converter.encode(cpu_texts)
        utils.loadData(text, t)
        utils.loadData(length, length_)

        preds = crnn(image)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        cost_ = criterion(preds, text, preds_size, length) / batch_size
        loss_avg_.add(cost_)

        _, preds = preds.max(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        for pred, target in zip(sim_preds, cpu_texts):
            if pred == target.lower():
                n_correct += 1

    raw_preds = converter.decode(preds.data, preds_size.data,
                                 raw=True)[:opt.test_display_number]
    for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts):
        print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))

    accuracy = n_correct / float(max_iter * opt.batch_size)
    print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_epoch = epoch
        best_i = i
    print('Best accuracy: ', best_accuracy, ' from ecpoch ', best_epoch,
          ', iteration ', best_i)
    return best_accuracy, best_epoch, best_i
Esempio n. 19
0
def train(loader,
          model,
          crit,
          optimizer,
          lr_scheduler,
          opt,
          rl_crit=None,
          converter=None):

    model.cuda()
    # crit.cuda()
    # optimizer.cuda()
    # lr_scheduler.cuda()
    # video = torch.FloatTensor(params.batchSize, 3, params.imgH, params.imgH)
    #TODO 原本中国手语是30
    text = torch.LongTensor(opt['batch_size'] * opt['max_len'])
    # text = torch.IntTensor(opt['batch_size'] * 30)
    length = torch.LongTensor(opt['batch_size'])
    converter = strLabelConverter(loader.dataset)
    # model = nn.DataParallel(model)
    writer = SummaryWriter("two_lstm_exp_German")
    loss_avg = averager()
    wer_val = 1.0
    for epoch in range(opt["epochs"]):
        n_correct = 0
        model.train()
        if opt['lr_schluder'] == 'StepLR':
            lr_scheduler.step()
        elif opt['lr_schluder'] == 'ReduceLROnPlateau':
            lr_scheduler.step(wer_val)
        iteration = 0
        f_wer = 0.0
        for data in loader:
            torch.cuda.synchronize()
            for p in model.parameters():
                p.requires_grad = True
            fc_feats = data['fc_feats'].cuda()  # (batch_size, 80, 512)

            # 1. slice 10 * (batch_size, 8, 512)
            # 2. send each slice to LSTM 10 * (batch_size, 1024)
            # 3. set another mask M2(batch_size, 10)
            # 4. if a slice is full of Zero, set the corresponding index of M2 zero
            # 5. LSTM2
            # 6. obtain final result bt *

            labels = data['labels'].cuda()
            # masks = data['masks'].cuda()
            # clip_nums = data['clip_num']
            # sorted_clip_nums,indices = torch.sort(clip_nums,descending=True)
            # _, desorted_indices = torch.sort(indices, descending=False)
            # fc_feats=fc_feats[indices]
            # pack = rnn.pack_padded_sequence(fc_feats,sorted_clip_nums,batch_first=True)
            #TODO
            optimizer.zero_grad()
            output = model(fc_feats)
            # desorted_res = output[desorted_indices]

            output = output.log_softmax(2).requires_grad_()
            _, preds = output.max(2)
            output = output.transpose(0, 1).contiguous()
            labels_ctc = []
            ys = []
            for i in labels:
                for j in i:
                    if not j == -1:
                        labels_ctc.append(j)
            for i in labels:
                non_zero = (i == -1).nonzero()
                if not non_zero.numel():
                    ys.append(opt['max_len'])
                else:
                    ys.append(non_zero[0][0])
            loadData(text, torch.LongTensor(labels_ctc))
            loadData(length, torch.LongTensor(ys))
            preds_size = Variable(
                torch.LongTensor([output.size(0)] * output.size(1)))

            loss = crit(output, text.cuda(), preds_size.cuda(), length.cuda())
            # loss= crit(output,text,preds_size,length)/opt['batch_size']
            preds = preds.contiguous().view(-1)
            sim_preds = converter.decode(preds.data,
                                         preds_size.data,
                                         raw=False)
            list_1 = []

            for pred, target in zip(sim_preds, labels):
                ts = target.squeeze().cpu().numpy().tolist()
                res = []
                for i in ts:
                    if i == -1:
                        continue
                    res.append(loader.dataset.ix_to_word[str(i)])
                target = ' '.join(res)
                tmp_wer = wer(target, pred)
                f_wer += tmp_wer

                if pred == target:
                    n_correct += 1
            loss_avg.add(loss)
            loss.backward()
            optimizer.step()
            torch.cuda.synchronize()
            iteration += 1
        acc = n_correct / float(len(loader))
        # print(len(loader)*opt['batch_size'])
        f_wer = f_wer / float(len(loader) * opt['batch_size'])
        print("[epoch %d]->train_loss = %.6f , wer = %.6f" %
              (epoch, loss_avg.val(), f_wer))

        if epoch % opt["eval_every"] == 0:
            for p in model.parameters():
                p.requires_grad = False

            loss_eval, wer_val = val(model, crit, opt, writer, epoch)
            writer.add_scalars('loss_epcho', {
                'train_loss': loss_avg.val(),
                'val_loss': loss_eval
            }, epoch)
            writer.add_scalars('wer_epcho', {
                'train_wer': f_wer,
                'eval_wer': wer_val
            }, epoch)

        if epoch % opt["save_checkpoint_every"] == 0:
            path = opt['root_model_path']
            # if not os.path.exists(path):
            #     os.mkdir(path)
            # else:
            #     shutil.rmtree(path)
            #     os.mkdir(path)
            model_path = os.path.join(path, 'model_%d.pth' % (epoch))
            model_info_path = os.path.join(path, 'model_score.txt')
            torch.save(model.state_dict(), model_path)
            print("model saved to %s" % (model_path))
            with open(model_info_path, 'a') as f:
                f.write(
                    "model_%d, loss: %.6f  train wer: %.6f val wer: %.6f\n" %
                    (epoch, loss_avg.val(), f_wer, wer_val))
        loss_avg.reset()
Esempio n. 20
0
def val(encoder,
        decoder,
        criterion,
        batchsize,
        dataset,
        teach_forcing=False,
        max_iter=100):
    print('Start val')

    for e, d in zip(encoder.parameters(), decoder.parameters()):
        e.requires_grad = False
        d.requires_grad = False

    encoder.eval()
    decoder.eval()
    data_loader = torch.utils.data.DataLoader(dataset,
                                              shuffle=False,
                                              batch_size=batchsize,
                                              num_workers=int(opt.workers))
    val_iter = iter(data_loader)

    n_correct = 0
    n_total = 0
    loss_avg = utils.averager()

    max_iter = min(max_iter, len(data_loader))
    # max_iter = len(data_loader) - 1
    for i in range(max_iter):
        data = val_iter.next()
        i += 1
        cpu_images, cpu_texts = data
        b = cpu_images.size(0)
        utils.loadData(image, cpu_images)

        target_variable = converter.encode(cpu_texts)
        n_total += len(cpu_texts[0]) + 1  # 还要准确预测出EOS停止位

        decoded_words = []
        decoded_label = []
        decoder_attentions = torch.zeros(len(cpu_texts[0]) + 1, opt.max_width)
        encoder_outputs = encoder(image)  # cnn+biLstm做特征提取
        target_variable = target_variable.cuda()
        decoder_input = target_variable[0].cuda()  # 初始化decoder的开始,从0开始输出
        decoder_hidden = decoder.initHidden(b).cuda()
        loss = 0.0
        if not teach_forcing:
            # 预测的时候采用非强制策略,将前一次的输出,作为下一次的输入,直到标签为EOS_TOKEN时停止
            for di in range(1, target_variable.shape[0]):  # 最大字符串的长度
                decoder_output, decoder_hidden, decoder_attention = decoder(
                    decoder_input, decoder_hidden, encoder_outputs)
                loss += criterion(decoder_output,
                                  target_variable[di])  # 每次预测一个字符
                loss_avg.add(loss)
                decoder_attentions[di - 1] = decoder_attention.data
                topv, topi = decoder_output.data.topk(1)
                ni = topi.squeeze(1)
                decoder_input = ni
                if ni == EOS_TOKEN:
                    decoded_words.append('<EOS>')
                    decoded_label.append(EOS_TOKEN)
                    break
                else:
                    decoded_words.append(converter.decode(ni))
                    decoded_label.append(ni)

        # 计算正确个数
        for pred, target in zip(decoded_label, target_variable[1:, :]):
            if pred == target:
                n_correct += 1

        if i % 100 == 0:  # 每100次输出一次
            texts = cpu_texts[0]
            print('pred:%-20s, gt: %-20s' % (decoded_words, texts))

    accuracy = n_correct / float(n_total)
    print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
Esempio n. 21
0
def val(encoder, decoder, criterion, val_loader, device):
    print('Start val')

    with torch.no_grad():
        encoder.eval()
        decoder.eval()

        i = 0
        n_correct = 0
        n_total = 0
        loss_avg = utils.averager()

        for data in val_loader:
            i += 1
            cpu_images, cpu_texts = data
            b = cpu_images.size(0)
            image = cpu_images.to(device)
            target_variable = converter.encode(cpu_texts).to(device)
            n_total += len(cpu_texts[0]) + 1
            #print(cpu_images.size(), target_variable.size())

            decoded_words = []
            decoded_label = []
            decoder_attentions = torch.zeros(
                len(cpu_texts[0]) + 1, opt.max_width)
            #print(decoder_attentions.size())
            encoder_outputs = encoder(image)
            decoder_input = target_variable[0].to(device)
            decoder_hidden = decoder.initHidden(b).to(device)
            #print(encoder_outputs.size(), decoder_input.size(), decoder_hidden.size())
            loss = 0.0

            for di in range(1, target_variable.shape[0]):
                decoder_output, decoder_hidden, decoder_attention = decoder(
                    decoder_input, decoder_hidden, encoder_outputs)
                loss += criterion(decoder_output, target_variable[di])
                loss_avg.add(loss)
                #print(decoder_attention.data.size())
                decoder_attentions[di - 1] = decoder_attention.data
                topv, topi = decoder_output.data.topk(1)
                ni = topi.squeeze(1)
                decoder_input = ni

                if ni == EOS:
                    decoded_words.append('<EOS>')
                    decoded_label.append(EOS)
                    break
                else:
                    decoded_words.append(converter.decode(ni))
                    decoded_label.append(ni)

            for pred, target in zip(decoded_label, target_variable[1:, :]):
                if pred == target:
                    n_correct += 1

            if i % 1000 == 0:
                print(i)
                texts = cpu_texts[0]
                print('pred:%-20s, gt: %-20s' % (decoded_words, texts))

    accuracy = n_correct / float(n_total)
    print('Test loss: %f, accuracy: %f' % (loss_avg.val(), accuracy))
    return loss_avg.val(), accuracy
Esempio n. 22
0
 def checkAverager(self):
     acc = utils.averager()
     acc.add(Variable(torch.Tensor([1, 2])))
     acc.add(Variable(torch.Tensor([[5, 6]])))
     assert acc.val() == 3.5
Esempio n. 23
0
def val(net, valdataset, criterionAttention,criterionCTC, max_iter=100):
    print('Start val')

    for p in crnn.parameters():
        p.requires_grad = False

    net.eval()
    val_sampler = dataset.randomSequentialSampler(valdataset, opt.batchSize)
    data_loader = torch.utils.data.DataLoader(
        valdataset, batch_size=opt.batchSize,
        shuffle=False, sampler=val_sampler,
        num_workers=int(opt.workers),
        collate_fn=dataset.alignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio=opt.keep_ratio))
   # data_loader = torch.utils.data.DataLoader(
   #     dataset, shuffle=True, batch_size=opt.batchSize, num_workers=int(opt.workers))
    val_iter = iter(data_loader)

    i = 0
    n_correct = 0
    loss_avg = utils.averager()

    max_iter = min(max_iter, len(data_loader))
    for i in range(max_iter):
        data = val_iter.next()
        i += 1
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        utils.loadData(image, cpu_images)
        tAttention, lAttention = converterAttention.encode(cpu_texts)
        utils.loadData(textAttention, tAttention)
        utils.loadData(lengthAttention, lAttention)
        tCTC, lCTC = converterCTC.encode(cpu_texts)
        utils.loadData(textCTC, tCTC)
        utils.loadData(lengthCTC, lCTC)
       # print (image)

        if opt.lang:
            predsCTC, predsAttention = crnn(image, lengthAttention, textAttention)
        else:
            predsCTC, predsAttention = crnn(imageAttention, lengthAttention)
        costAttention = criterionAttention(predsAttention, textAttention)
        preds_size = Variable(torch.IntTensor([predsCTC.size(0)] * batch_size))
        costCTC = criterionCTC(predsCTC, textCTC, preds_size, lengthCTC) / batch_size
        loss_avg.add(costAttention)
        loss_avg.add(costCTC.cuda())

        _, predsAttention = predsAttention.max(1)
        predsAttention = predsAttention.view(-1)
        sim_predsAttention = converterAttention.decode(predsAttention.data, lengthAttention.data)
        for pred, target in zip(sim_predsAttention, cpu_texts):
            #regText = pred.decode('utf-8')
            regText = pred#type of pred is unicode, do not need convert
            gtText = target.decode('utf-8')#convert str(label type)to unicode
            print (regText,gtText)
            if regText == gtText:
                print("correct")
                print (regText,gtText)
                n_correct += 1

   # for pred, gt in zip(sim_preds, cpu_texts):
       # gt = ''.join(gt.split(opt.sep))
       # print('%-20s, gt: %-20s' % (pred, gt))

    accuracy = n_correct / float(max_iter * opt.batchSize)
    print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
Esempio n. 24
0
def val(net, dataset, criterion, max_iter=100):
    print('Start val')

    for p in crnn.parameters():
        p.requires_grad = False

    net.eval()
    data_loader = torch.utils.data.DataLoader(dataset,
                                              shuffle=True,
                                              batch_size=opt.batchSize,
                                              num_workers=int(opt.workers))
    val_iter = iter(data_loader)
    epoch_iter = 0
    n_correct = 0
    loss_avg = utils.averager()
    edit_distance = 0
    max_iter = min(max_iter, len(data_loader))
    for epoch_iter in range(max_iter):
        data = val_iter.next()
        epoch_iter += 1
        cpu_images, cpu_texts = data
        # print (cpu_texts)
        # from matplotlib import pyplot as plt
        # import numpy as np
        # for i in range(cpu_images.shape[0]):
        #     tmp = cpu_images[i].numpy()
        #     # tmp = np.squeeze(tmp, axis=0)
        #     tmp = tmp.transpose(1, 2, 0)
        #     plt.imshow(tmp)
        #     plt.show()
        batch_size = cpu_images.size(0)
        utils.loadData(image, cpu_images)
        t, l = converter.encode(cpu_texts)
        utils.loadData(text, t)
        utils.loadData(length, l)

        preds = crnn(image)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        cost = criterion(preds, text, preds_size, length) / batch_size
        loss_avg.add(cost)
        _, preds = preds.max(2)
        # preds = preds.squeeze(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)

        for pred, target in zip(sim_preds, cpu_texts):
            if len(opt.alphabet) == 36:
                if pred == target.lower():
                    n_correct += 1
            else:
                if pred == target:
                    n_correct += 1

            # add edit distance.
            if len(opt.alphabet) == 36:
                edit_distance += Lev.distance(pred, target.lower())
            else:
                edit_distance += Lev.distance(pred, target)

    raw_preds = converter.decode(preds.data, preds_size.data,
                                 raw=True)[:opt.n_test_disp]
    for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts):
        print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))

    accuracy = n_correct / float(max_iter * opt.batchSize)
    print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
    print('Total distance: ', edit_distance)
    return accuracy, edit_distance
Esempio n. 25
0
def val(model, val_loader, criterion, iteration, max_i=1000):

    print('Start val')
    # for p in model.parameters():
    #     p.requires_grad = False
    model.eval()
    i = 0
    n_correct = 0
    total_images_count = 0

    loss_avg = utils.averager()

    for i_batch, (image, label, length) in enumerate(val_loader):
        image = image.to(device)
        label = label.to(device)
        length = length.to(device)

        preds = model(image)  # preds 所在设备跟会跟model相同
        batch_size = preds.size(1)
        max_seq = torch.IntTensor([preds.size(0)] * batch_size)  # T, B, C
        max_seq = max_seq.to(device)
        cost = criterion(preds, label, max_seq,
                         length) / batch_size  # 一个样本的 cost
        loss_avg.add(cost)
        _, paths = preds.max(2)
        # paths, scores = beam_decode(paths)
        paths = paths.transpose(1, 0).contiguous().view(-1)
        pred_labels = converter.decode(paths.data, max_seq.data, raw=False)
        label_split_start_idx = 0
        for pred, target_len in zip(pred_labels, length):
            target = label[label_split_start_idx:label_split_start_idx +
                           target_len]
            label_split_start_idx += target_len.tolist(
            )  # 这个 tolist 惊到我了,其实是一个 int
            target = "".join(
                [converter.alphabet[t - 1] for t in target.cpu().numpy()])
            # target = converter.decode(target, torch.IntTensor([len(target)]))
            if pred == target:
                n_correct += 1
            total_images_count += 1

        if (i_batch + 1) % params.displayInterval == 0:
            print('[%d/%d][%d/%d]' %
                  (iteration, params.epoch, i_batch, len(val_loader)))

        if i_batch == max_i:
            break

    # 验证样本展示,方便观察训练情况
    raw_preds = converter.decode(paths.data, max_seq.data,
                                 raw=True)[:params.n_test_disp]
    label_split_start_idx = 0
    for raw_pred, pred, target_len in zip(raw_preds, pred_labels, length):
        target = label[label_split_start_idx:label_split_start_idx +
                       target_len]
        label_split_start_idx += target_len.tolist(
        )  # 这个 tolist 惊到我了,其实是一个 int
        target = "".join(
            [converter.alphabet[t - 1] for t in target.cpu().numpy()])
        # target = converter.decode(target, torch.IntTensor([len(target)])) # 这个 decode 是解码路径,不能做label的int2char,因为比如label=1111,那decode会变成a,注意别混淆了
        print('%-20s => %-20s, tg: %-20s' % (raw_pred, pred, target))

    # print(n_correct)
    # print(max_i * params.val_batchSize)
    accuracy = n_correct / float(total_images_count)
    print('Val loss: %f, accuray: %d/%d=%f' %
          (loss_avg.val(), n_correct, total_images_count, accuracy))

    return accuracy
Esempio n. 26
0
def val(model, crit, opt, writer=None, epoch=0):
    dataset = VideoDataset(opt, 'test')
    dataloader = DataLoader(dataset,
                            batch_size=opt['batch_size'],
                            shuffle=True)
    opt["vocab_size"] = dataset.get_vocab_size()
    model.eval()
    # TODO 原本中国手语是30
    text = torch.LongTensor(opt['batch_size'] * opt['max_len'])
    # text = torch.IntTensor(opt['batch_size'] * 30)
    length = torch.LongTensor(opt['batch_size'])
    loss_avg = averager()
    n_correct = 0
    f_wer = 0.0
    # converter = strLabelConverter(dataset)
    converter = strLabelConverter(dataloader.dataset)
    for data in dataloader:
        fc_feats = data['fc_feats'].cuda()
        labels = data['labels'].cuda()
        # masks = data['masks'].cuda()
        # clip_nums = data['clip_num']
        # sorted_clip_nums, indices = torch.sort(clip_nums, descending=True)
        # _, desorted_indices = torch.sort(indices, descending=False)
        # fc_feats = fc_feats[indices]
        # pack = rnn.pack_padded_sequence(fc_feats, sorted_clip_nums, batch_first=True)
        with torch.no_grad():
            output = model(fc_feats)

        # desorted_res = output[desorted_indices]

        output = output.log_softmax(2).requires_grad_()
        _, preds = output.max(2)
        output = output.transpose(0, 1).contiguous()
        labels_ctc = []
        ys = []
        for i in labels:
            for j in i:
                if not j == -1:
                    labels_ctc.append(j)
        for i in labels:
            non_zero = (i == -1).nonzero()
            if not non_zero.numel():
                ys.append(opt['max_len'])
            else:
                ys.append(non_zero[0][0])
        loadData(text, torch.LongTensor(labels_ctc))
        loadData(length, torch.LongTensor(ys))
        preds_size = Variable(
            torch.LongTensor([output.size(0)] * output.size(1)))
        loss = crit(output.cuda(), text.cuda(), preds_size.cuda(),
                    length.cuda())

        preds = preds.contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        for pred, target in zip(sim_preds, labels):
            ts = target.squeeze().cpu().numpy().tolist()
            res = []
            for i in ts:
                if i == -1:
                    continue
                res.append(dataloader.dataset.ix_to_word[str(i)])
            target = ' '.join(res)
            tmp_wer = wer(target, pred)
            f_wer += tmp_wer
            if pred == target:
                n_correct += 1
        loss_avg.add(loss)
    acc = n_correct / float(len(dataloader))
    f_wer = f_wer / float(len(dataloader) * opt['batch_size'])
    print("[epoch %d]->val_loss = %.6f , wer = %.6f" %
          (epoch, loss_avg.val(), f_wer))

    # writer.add_scalar('scalar/val_loss_epcho', loss_avg.val())
    return loss_avg.val(), f_wer
Esempio n. 27
0
def val(net, val_dataset, criterion, max_iter=100):
    print('Start val')

    for p in model.parameters():
        p.requires_grad = False

    net.eval()
    data_loader = torch.utils.data.DataLoader(
        val_dataset, shuffle=False, batch_size=opt.batchSize, num_workers=int(opt.workers),collate_fn=dataset.alignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio=opt.keep_ratio))
    val_iter = iter(data_loader)

    i = 0
    n_correct = 0
    loss_avg = utils.averager()

    max_iter = max(max_iter, len(data_loader))

    img_num=1
    max_sub=-1;
    str_line=''
    dst_file=open('./tes_hori_sub_rst.txt','w+')
    gt = []

    for i in range(max_iter):
        data = val_iter.next()
        i += 1
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        utils.loadData(image, cpu_images)
        t, l = converter.encode(cpu_texts)
        utils.loadData(text, t)
        utils.loadData(length, l)

        preds = model(image)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        cost = criterion(preds, text, preds_size, length) / batch_size
        loss_avg.add(cost)

        _, preds = preds.max(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        for pred, gt in zip(sim_preds, cpu_texts):
            #
            # if gt=='line_2135.jpg'
            # # with subcribe
            gt=gt.split('_')
            cur_num=int(gt[1])
            if cur_num==img_num:
                max_sub+=1
                str_line=pred+str_line
            else:

                # dst_ite_pth=dst_root+str(img_num).zfill(7)+'.txt'
                # f=open(dst_ite_pth,'w+')
                # f.write(str_line)
                # f.close()
                dst_file.write(gt[0]+'_'+str(img_num)+'.jpg'+' '+str_line+'\n')
                str_line=pred
                img_num=cur_num

            #without subcribe
            # dst_file.write(gt + ' ' + pred + '\n')

            print('pred:%-20s, gt: %-20s' % ( pred, gt))

    dst_file.write(gt[0] + '_' + str(img_num) + '.jpg' + ' ' + str_line + '\n')
    dst_file.close()
def val(net, net2, net3, _dataset, epoch, step, criterion, max_iter=100):
    logger.info('Start val')
    # for p in crnn.parameters():
    #     p.requires_grad = False
    net.eval()
    net2.eval()
    net3.eval()
    data_loader = torch.utils.data.DataLoader(
        _dataset,
        shuffle=False,
        batch_size=params.batchSize,
        num_workers=int(params.workers),
        collate_fn=dataset.alignCollate(imgH=params.imgH,
                                        imgW=params.imgW,
                                        keep_ratio=params.keep_ratio))
    val_iter = iter(data_loader)
    i = 0
    n_correct = 0
    loss_avg = utils.averager()
    max_iter = len(data_loader)
    record_dir = log_dir + 'epoch_%d_step_%d_data.txt' % (epoch, step)
    record_dir1 = log_dir + 'epoch_%d_step_%d_data1.txt' % (epoch, step)
    record_dir2 = log_dir + 'epoch_%d_step_%d_data2.txt' % (epoch, step)
    r = 1
    f = open(record_dir, "a")
    f1 = open(record_dir1, "a")
    f2 = open(record_dir2, "a")
    num_label, num_pred = params.total_num, 0

    start = time.time()
    for i in range(max_iter):
        data = val_iter.next()
        if i < 6000:
            pass  #continue
        i += 1
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        utils.loadData(image, cpu_images)
        t, l = converter.encode(cpu_texts)
        utils.loadData(text, t)
        utils.loadData(length, l)

        with torch.no_grad():
            n1img = net(image)
            n2img = net2(image)
            n3img = net3(image)
        preds_size = Variable(torch.IntTensor([n1img.size(0)] * batch_size))

        _, n1 = n1img.max(2)
        _, n2 = n2img.max(2)
        _, n3 = n3img.max(2)
        ind = torch.arange(batch_size)
        _ind = torch.arange(batch_size)
        n1_index = n1.transpose(1, 0).data
        n2_index = n2.transpose(1, 0).data
        n3_index = n3.transpose(1, 0).data
        ind = ind[torch.sum(n1_index != 0, 1) == torch.sum(n2_index != 0, 1)]
        _ind = _ind[
            (torch.sum(n1_index != 0, 1) == torch.sum(n2_index != 0, 1)) *
            (torch.sum(n3_index != 0, 1) == torch.sum(n2_index != 0, 1))]
        for i in ind:
            ind1 = np.arange(n1img.shape[0])
            ind2 = np.arange(n1img.shape[0])
            ind1 = ind1[(n1_index[int(i), :].cpu().numpy().astype(bool) != 0)]
            ind2 = ind2[(n2_index[int(i), :].cpu().numpy().astype(bool) != 0)]
            n1img[ind1, int(i), :] = (n1img[ind1, int(i), :] +
                                      n2img[ind2, int(i), :]) / 2

            if torch.sum(int(i) == _ind) > 0:
                ind3 = np.arange(n1img.shape[0])
                ind3 = ind3[(n3_index[int(i), :].cpu().numpy().astype(bool) !=
                             0)]
                n1img[ind1, int(i), :] = (
                    n1img[ind1, int(i), :] + n2img[ind2, int(i), :] +
                    n3img[ind3, int(i), :]) / 3  #+ n3img[ind3, int(i), :]
            else:
                n1img[ind1, int(i), :] = (n1img[ind1, int(i), :] +
                                          n2img[ind2, int(i), :]) / 2

        preds = n1img
        soft_max_preds = torch.exp(n1img.data) / torch.unsqueeze(
            torch.sum(torch.exp(n1img.data), 2), 2).repeat(1, 1, 9116)
        print(n1img.data)
        print('----------------------')
        print(torch.exp(n1img.data))
        print('---------------------------------------')
        print(
            torch.unsqueeze(torch.sum(torch.exp(n1img.data), 2),
                            2).repeat(1, 1, 9116))
        print('--------------')
        print(soft_max_preds)
        cost = criterion(preds, text, preds_size, length) / batch_size
        loss_avg.add(cost)
        _, preds = preds.max(2)
        _preds = preds.transpose(1, 0).contiguous()
        preds = preds.transpose(1, 0).contiguous().view(-1)

        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        if not isinstance(sim_preds, list):
            sim_preds = [sim_preds]

        for i, pred in enumerate(sim_preds):
            ind = _preds[int(i), :] != 0
            ind2 = torch.arange(_preds.shape[1])
            ind2 = ind2[ind]
            ind = _preds[int(i), :][ind]
            d = []
            j = 0
            for _ind in ind2:
                d.append('%.2f' % (soft_max_preds[int(_ind), i, int(ind[j])]))
                j += 1
            f.write(str(r).zfill(6) + ".jpg " + pred + ' '.join(d) + "\n")
            r += 1
        list_1 = []
        for i in cpu_texts:
            string = i.decode('utf-8', 'strict')
            list_1.append(string)
        for pred, target in zip(sim_preds, list_1):
            if pred == target:
                n_correct += 1
        num_pred += len(sim_preds)

    print("")
    f.close()

    raw_preds = converter.decode(preds.data, preds_size.data,
                                 raw=True)[:params.n_test_disp]
    for raw_pred, pred, gt in zip(raw_preds, sim_preds, list_1):
        logger.info('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))

    logger.info('correct_num: %d' % (n_correct))
    logger.info('Total_num: %d' % (max_iter * params.batchSize))
    accuracy = float(n_correct) / num_pred
    recall = float(n_correct) / num_label
    logger.info(
        'Test loss: %f, accuray: %f, recall: %f, F1 score: %f, Cost : %.4fs per img'
        % (loss_avg.val(), accuracy, recall, 2 * accuracy * recall /
           (accuracy + recall + 1e-2), (time.time() - start) / max_iter))
Esempio n. 29
0
def train(field):
    alphabet = ''.join(json.load(open('./cn-alphabet.json', 'rb')))
    nclass = len(alphabet) + 1  # add the dash -
    batch_size = BATCH_SIZE
    if field == 'address' or field == 'psb':
        batch_size = 1  # image length varies

    converter = LabelConverter(alphabet)
    criterion = CTCLoss(zero_infinity=True)

    crnn = CRNN(IMAGE_HEIGHT, nc, nclass, number_hidden)
    crnn.apply(weights_init)

    image_transform = transforms.Compose([
        Rescale(IMAGE_HEIGHT),
        transforms.ToTensor(),
        Normalize()
    ])

    dataset = LmdbDataset(db_path, field, image_transform)
    dataloader = DataLoader(dataset, batch_size=batch_size,
                            shuffle=True, num_workers=4)

    image = torch.FloatTensor(batch_size, 3, IMAGE_HEIGHT, IMAGE_HEIGHT)
    text = torch.IntTensor(batch_size * 5)
    length = torch.IntTensor(batch_size)

    image = Variable(image)
    text = Variable(text)
    length = Variable(length)

    loss_avg = utils.averager()
    optimizer = optim.RMSprop(crnn.parameters(), lr=lr)

    if torch.cuda.is_available():
        crnn.cuda()
        crnn = nn.DataParallel(crnn)
        image = image.cuda()
        criterion = criterion.cuda()

    def train_batch(net, iteration):
        data = iteration.next()
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        utils.load_data(image, cpu_images)
        t, l = converter.encode(cpu_texts)
        utils.load_data(text, t)
        utils.load_data(length, l)

        preds = crnn(image)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        cost = criterion(preds, text, preds_size, length) / batch_size
        crnn.zero_grad()
        cost.backward()
        optimizer.step()
        return cost

    nepoch = 25
    for epoch in range(nepoch):
        train_iter = iter(dataloader)
        i = 0
        while i < len(dataloader):
            for p in crnn.parameters():
                p.requires_grad = True
            crnn.train()

            cost = train_batch(crnn, train_iter)
            loss_avg.add(cost)
            i += 1

            if i % 500 == 0:
                print('%s [%d/%d][%d/%d] Loss: %f' %
                        (datetime.datetime.now(), epoch, nepoch, i, len(dataloader), loss_avg.val()))
                loss_avg.reset()

            # do checkpointing
            if i % 500 == 0:
                torch.save(
                    crnn.state_dict(), f'{model_path}crnn_{field}_{epoch}_{i}.pth')
Esempio n. 30
0
def val(model, converter, data_loader, max_iter=100):
    print('Start val')

    # input tensor
    image = torch.FloatTensor(opt.batch_size, 3, imgH, imgH)
    image = image.cuda()

    for p in model.parameters():
        p.requires_grad = False

    model.eval()

    val_iter = iter(data_loader)

    i = 0
    n_correct = 0
    loss_avg = averager()

    max_iter = min(max_iter, len(data_loader))
    for i in range(max_iter):
        data = val_iter.next()
        i += 1
        cpu_images, cpu_texts = data
        print('-------\ninput ', cpu_images.size())
        batch_size = cpu_images.size(0)  #30个
        loadData(image, cpu_images)

        preds = model(image)  #[483*10*]
        print('out ', preds.size())

        preds_size = Variable(torch.IntTensor([preds.size(1)] * batch_size))
        print("len ", preds_size.data)

        _, preds = preds.max(2)

        preds = preds.contiguous().view(-1)
        print("out preds ", preds.size())
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)

        if batch_size == 1:
            sim_preds = [sim_preds]
        for pred, target in zip(sim_preds, cpu_texts):
            print("pred ", pred, 'gt ', target)
            if pred == target.lower():
                print('true')
                n_correct += 1
            else:
                print('false')

        # raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:10]
        # for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts):
        #     print('%-20s => %-20s, gt: %-20s\n' % (raw_pred, pred, gt))

        # img = cpu_images.numpy()[0]
        # img = np.squeeze(img)
        # if len(img.shape) == 3 and img.shape[2] != 3:
        #     img = img.transpose((1, 2, 0))
        # cv2.imshow("im", img)
        # cv2.waitKey(0)

    accuracy = n_correct / float(max_iter * opt.batch_size)
    print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
    return accuracy
Esempio n. 31
0
image = torch.FloatTensor(opt.batchSize, 3, opt.imgH, opt.imgH)
text = torch.IntTensor(opt.batchSize * 5)
length = torch.IntTensor(opt.batchSize)

if opt.cuda:
    crnn.cuda()
    image = image.cuda()
    criterion = criterion.cuda()

image = Variable(image)
text = Variable(text)
length = Variable(length)

# loss averager
loss_avg = utils.averager()

# setup optimizer
if opt.adam:
    optimizer = optim.Adam(crnn.parameters(), lr=opt.lr,
                           betas=(opt.beta1, 0.999))
elif opt.adadelta:
    optimizer = optim.Adadelta(crnn.parameters(), lr=opt.lr)
else:
    optimizer = optim.RMSprop(crnn.parameters(), lr=opt.lr)


def val(net, dataset, criterion, max_iter=100):
    print('Start val')

    for p in crnn.parameters():
Esempio n. 32
0
image = torch.FloatTensor(opt.batchSize, 3, opt.imgH, opt.imgH)
text = torch.IntTensor(opt.batchSize * 5)
length = torch.IntTensor(opt.batchSize)

if torch.cuda.is_available():
    crnn = crnn.cuda(opt.gpu)
    # crnn = torch.nn.DataParallel(crnn, device_ids=range(opt.ngpu))
    image = image.cuda(opt.gpu)
    criterion = criterion.cuda(opt.gpu)

image = Variable(image)
text = Variable(text)
length = Variable(length)

# loss averager
loss_avg = utils.averager()

# setup optimizer
if opt.adam:
    optimizer = optim.Adam(crnn.parameters(),
                           lr=opt.lr,
                           betas=(opt.beta1, 0.999))
elif opt.adadelta:
    optimizer = optim.Adadelta(crnn.parameters())
else:
    optimizer = optim.RMSprop(crnn.parameters(), lr=opt.lr)


def val(net, criterion, max_iter=100):
    print('Start val')
Esempio n. 33
0
def val(net, criterion, max_iter=100):
    print('Start val')
    # read test set
    test_dataset = dataset.lmdbDataset(root=params.valroot,
                                       transform=dataset.resizeNormalize(
                                           (params.imgW, params.imgH)))

    for p in crnn.parameters():
        p.requires_grad = False
    net.eval()
    try:
        data_loader = torch.utils.data.DataLoader(test_dataset,
                                                  shuffle=True,
                                                  batch_size=params.batchSize,
                                                  num_workers=int(
                                                      params.workers))
        val_iter = iter(data_loader)
        i = 0
        n_correct = 0
        loss_avg = utils.averager()

        max_iter = min(max_iter, len(data_loader))
        for i in range(max_iter):
            data = val_iter.next()
            i += 1
            cpu_images, cpu_texts = data
            batch_size = cpu_images.size(0)
            utils.loadData(image, cpu_images)
            t, l = converter.encode(cpu_texts)
            utils.loadData(text, t)
            utils.loadData(length, l)
            preds = crnn(image)
            preds_size = Variable(torch.IntTensor([preds.size(0)] *
                                                  batch_size))
            cost = criterion(preds, text, preds_size, length) / batch_size
            loss_avg.add(cost)

            _, preds = preds.max(2)
            # preds = preds.squeeze(2)
            preds = preds.transpose(1, 0).contiguous().view(-1)
            sim_preds = converter.decode(preds.data,
                                         preds_size.data,
                                         raw=False)
            list_1 = []
            for i in cpu_texts:
                list_1.append(i.decode('utf-8', 'strict'))
            for pred, target in zip(sim_preds, list_1):
                if pred == target:
                    n_correct += 1

        raw_preds = converter.decode(preds.data, preds_size.data,
                                     raw=True)[:params.n_test_disp]
        for raw_pred, pred, gt in zip(raw_preds, sim_preds, list_1):
            print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))

        # print(n_correct)
        # print(max_iter * params.batchSize)
        accuracy = n_correct / float(max_iter * params.batchSize)
        print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
    except:
        pass