def evaluate(image, text, encoder, decoder, data_loader, max_eval_iter=100):

    for e, d in zip(encoder.parameters(), decoder.parameters()):
        e.requires_grad = False
        d.requires_grad = False

    encoder.eval()
    decoder.eval()
    val_iter = iter(data_loader)

    n_correct = 0
    n_total = 0
    loss_avg = utils.Averager()

    for i in range(min(len(data_loader), max_eval_iter)):
        cpu_images, cpu_texts = val_iter.next()
        batch_size = cpu_images.size(0)
        utils.load_data(image, cpu_images)

        target_variable = converter.encode(cpu_texts)
        n_total += len(cpu_texts[0]) + 1

        decoded_words = []
        decoded_label = []
        encoder_outputs = encoder(image)
        if torch.cuda.is_available():
            target_variable = target_variable.cuda()
            decoder_input = target_variable[0].cuda()
            decoder_hidden = decoder.initHidden(batch_size).cuda()
        else:
            decoder_input = target_variable[0]
            decoder_hidden = decoder.initHidden(batch_size)

        for di in range(1, target_variable.shape[0]):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.data.topk(1)
            ni = topi.squeeze(1)
            decoder_input = ni
            if ni == utils.EOS_TOKEN:
                decoded_label.append(utils.EOS_TOKEN)
                break
            else:
                decoded_words.append(converter.decode(ni))
                decoded_label.append(ni)

        for pred, target in zip(decoded_label, target_variable[1:, :]):
            if pred == target:
                n_correct += 1

        if i % 10 == 0:
            texts = cpu_texts[0]
            print('pred {}: {}'.format(i, ''.join(decoded_words)))
            print('gt {}: {}'.format(i, texts))

    accuracy = n_correct / float(n_total)
    print('Test loss: {}, accuray: {}'.format(loss_avg.val(), accuracy))
Exemple #2
0
def evaluate(image, text, model, criterion, data_loader, max_eval_iter=100):
    # for e, d in zip(encoder.parameters(), decoder.parameters()):
    #     e.requires_grad = False
    #     d.requires_grad = False

    # encoder.eval()
    # decoder.eval()
    model.eval()
    val_iter = iter(data_loader)

    n_correct = 0
    n_total = 0
    loss_avg = utils.Averager()

    epoch_loss = 0

    with torch.no_grad():
        for i in range(min(len(data_loader), max_eval_iter)):
            cpu_images, cpu_texts = val_iter.next()
            batch_size = cpu_images.size(0)
            utils.load_data(image, cpu_images)

            target_variable = converter.encode(cpu_texts)
            n_total += len(cpu_texts[0]) + 1

            decoded_words = []
            # encoder_outputs = encoder(image)
            if torch.cuda.is_available():
                target_variable = target_variable.cuda()

            decoded_label = model(image, target_variable, 0)
            label_number, batch, output_dim = decoded_label.size()
            decoded_label = decoded_label.view(-1, output_dim)
            target_variable = target_variable.view(-1)

            loss = criterion(decoded_label, target_variable)
            epoch_loss += loss.item()

            if i % 10 == 0:
                texts = cpu_texts[0]
                for idl in range(decoded_label.shape[0]):
                    topv, topi = decoded_label[idl].data.topk(1)
                    ni = topi.squeeze()
                    decoded_words.append(converter.decode(ni))

                print('pred {}: {}'.format(i, ' '.join(decoded_words)))
                print('gt {}: {}\n'.format(i, texts))

    accuracy = epoch_loss / max_eval_iter
    print('Test epoch loss: {}, accuray: {}'.format(epoch_loss, accuracy))
def valid(net, valid_loader, device, cfg):
    print('start valid')
    criterion = nn.CTCLoss()

    loss_avg = utils.Averager()

    net.eval()
    # net = net.to(device)
    correct_num = 0
    total_num = 0

    for i, (images, labels) in enumerate(valid_loader):
        images = images.to(device)
        labels, labels_len = matrix2linear(labels)
        labels = labels.to(device)
        labels_len = torch.IntTensor(labels_len)

        preds = net(images)
        # if torch.sum(torch.isnan(preds)) >= 1:
        #     print('nan: {}, lr: {}'.format(i + 1, scheduler.get_lr()[0]))
        #     break

        preds_len = torch.IntTensor([preds.size(0)] * int(preds.size(1)))
        with torch.backends.cudnn.flags(enabled=False):
            loss = criterion(preds, labels, preds_len, labels_len)
        loss_avg.add(loss)
        preds = preds.max(2)[1]
        # print(preds.size())
        preds = preds.transpose(1, 0).contiguous().view(-1)
        # print(preds.size())
        preds = decode(preds)
        # print(len(preds))
        total_num += len(preds)
        for x, y in zip(preds, labels):
            if int(x) == int(y):
                correct_num += 1
    acc = correct_num / float(total_num) * 100
    valid_loss = loss_avg.val()
    print('Valid Loss: {0:.3f}, Accuracy: {1:.3f}%'.format(valid_loss, acc))
def train(net, train_loader, valid_loader, device, cfg):
    criterion = nn.CTCLoss()
    # optimizer = torch.optim.Adadelta(net.parameters(), lr=cfg.learning_rate)
    optimizer = torch.optim.Adam(
        net.parameters(),
        lr=cfg.learning_rate,
        weight_decay=cfg.weight_decay,
    )
    # scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.8)
    scheduler = torch.optim.lr_scheduler.StepLR(
        optimizer,
        step_size=1,
        gamma=0.1,
    )

    loss_avg = utils.Averager()

    net = net.to(device)

    for epoch in range(cfg.num_epochs):
        for i, (images, labels) in enumerate(train_loader):
            net.train()
            images = images.to(device)
            labels, labels_len = matrix2linear(labels)
            labels = labels.to(device)
            labels_len = torch.IntTensor(labels_len)

            preds = net(images)
            # print(preds.size(), preds_len)
            if torch.sum(torch.isnan(preds)) >= 1:
                print('nan: {}, lr: {}'.format(i + 1, scheduler.get_lr()[0]))
                break

            preds_len = torch.IntTensor([preds.size(0)] * int(preds.size(1)))
            with torch.backends.cudnn.flags(enabled=False):
                loss = criterion(preds, labels, preds_len, labels_len)

            loss_avg.add(loss)

            if (i + 1) % cfg.display_interval == 0:
                print('[Epoch {0}/{1}] [Batch {2}/{3}] Loss: {4:.3f}'.format(
                    epoch + 1,
                    cfg.num_epochs,
                    i + 1,
                    len(train_loader),
                    loss_avg.val(),
                ))
                loss_avg.reset()

            optimizer.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(
                net.parameters(),
                max_norm=20,
                norm_type=2,
            )
            optimizer.step()
            if (i + 1) % cfg.valid_interval == 0:
                valid(net, valid_loader, device, cfg)

        scheduler.step()
        torch.save(net.state_dict(),
                   '{0}/crnn_ctc_{1}.pth'.format(cfg.model_path, epoch))
def train(image,
          text,
          encoder,
          decoder,
          criterion,
          train_loader,
          teach_forcing_prob=1):
    logger = Logger('log/')
    # optimizer
    encoder_optimizer = torch.optim.Adam(encoder.parameters(),
                                         lr=cfg.learning_rate,
                                         betas=(0.5, 0.999))
    decoder_optimizer = torch.optim.Adam(decoder.parameters(),
                                         lr=cfg.learning_rate,
                                         betas=(0.5, 0.999))

    # loss averager
    loss_avg = utils.Averager()

    for epoch in range(cfg.num_epochs):
        train_iter = iter(train_loader)

        for i in range(len(train_loader)):
            cpu_images, cpu_texts = train_iter.next()
            batch_size = cpu_images.size(0)

            for encoder_param, decoder_param in zip(encoder.parameters(),
                                                    decoder.parameters()):
                encoder_param.requires_grad = True
                decoder_param.requires_grad = True
            encoder.train()
            decoder.train()

            target_variable = converter.encode(cpu_texts)
            utils.load_data(image, cpu_images)

            # CNN + BiLSTM
            encoder_outputs = encoder(image)
            target_variable = target_variable.cuda()
            # start decoder for SOS_TOKEN
            decoder_input = target_variable[utils.SOS_TOKEN].cuda()
            decoder_hidden = decoder.initHidden(batch_size).cuda()

            loss = 0.0
            teach_forcing = True if random.random(
            ) > teach_forcing_prob else False
            if teach_forcing:
                for di in range(1, target_variable.shape[0]):
                    decoder_output, decoder_hidden, decoder_attention = decoder(
                        decoder_input, decoder_hidden, encoder_outputs)
                    loss += criterion(decoder_output, target_variable[di])
                    decoder_input = target_variable[di]
            else:
                for di in range(1, target_variable.shape[0]):
                    decoder_output, decoder_hidden, decoder_attention = decoder(
                        decoder_input, decoder_hidden, encoder_outputs)
                    loss += criterion(decoder_output, target_variable[di])
                    topv, topi = decoder_output.data.topk(1)
                    ni = topi.squeeze()
                    decoder_input = ni
            encoder.zero_grad()
            decoder.zero_grad()
            loss.backward()
            encoder_optimizer.step()
            decoder_optimizer.step()

            loss_avg.add(loss)

            if i % 10 == 0:
                print('[Epoch {0}/{1}] [Batch {2}/{3}] Loss: {4}'.format(
                    epoch, cfg.num_epochs, i, len(train_loader),
                    loss_avg.val()))
                logger.scalar_summary(
                    'Loss of Epoch{0}/miniBatch(100)'.format(epoch),
                    loss_avg.val(), i)
                logger.scalar_summary('Loss of Epoch/miniBatch(100)',
                                      loss_avg.val(),
                                      epoch * len(train_loader) + i)
                loss_avg.reset()

        # save checkpoint
        torch.save(encoder.state_dict(),
                   '{0}/encoder_{1}.pth'.format(cfg.model, epoch))
        torch.save(decoder.state_dict(),
                   '{0}/decoder_{1}.pth'.format(cfg.model, epoch))
Exemple #6
0
def train(image, text, model, criterion, train_loader, teach_forcing_prob=0.5):
    # optimizer
    # encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=cfg.learning_rate, betas=(0.5, 0.999))
    # decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=cfg.learning_rate, betas=(0.5, 0.999))
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=cfg.learning_rate,
                                 betas=(0.5, 0.999))

    # loss averager
    loss_avg = utils.Averager()

    for epoch in range(cfg.num_epochs):
        train_iter = iter(train_loader)

        for i in range(len(train_loader)):

            cpu_images, cpu_texts = train_iter.next()
            batch_size = cpu_images.size(0)

            # for encoder_param, decoder_param in zip(encoder.parameters(), decoder.parameters()):
            #     encoder_param.requires_grad = True
            #     decoder_param.requires_grad = True
            # encoder.train()
            # decoder.train()

            optimizer.zero_grad()

            # for model_param in zip(model.parameters()):
            #     model_param.requires_grad = True
            model.train()

            # formula = formulas(int(cpu_texts))
            target_variable = converter.encode(cpu_texts)
            utils.load_data(image, cpu_images)

            # # CNN + BiLSTM
            # encoder_outputs = encoder(image)
            if torch.cuda.is_available():
                target_variable = target_variable.cuda()

            output = model(image, target_variable)

            #     # start decoder for SOS_TOKEN
            #     decoder_input = target_variable[utils.SOS_TOKEN].cuda()
            #     decoder_hidden = decoder.initHidden(batch_size).cuda()
            # else:
            #     decoder_input = target_variable[utils.SOS_TOKEN]
            #     decoder_hidden = decoder.initHidden(batch_size)

            # # if i == 28:
            # # outputs for the test
            # print(f'    decoder_input {0}', decoder_input.shape)
            # print(f'    decoder_hidden{0}', decoder_hidden.shape)
            # print(f'    encoder_outputs{0}', encoder_outputs.shape)

            # tensor2image(cpu_images[0])
            # print(cpu_texts[0])
            # print(target_variable[0])
            # print(cpu_texts)
            # print(target_variable)

            loss = 0.0
            p = True if random.random() > teach_forcing_prob else False
            # print('    teach_forcing: {}'.format(teach_forcing))
            # print('    decoder_input.shape[0] {}, batch_size {}, batch_size condition: {}'.format(decoder_input.shape[0], batch_size, decoder_input.shape[0] < batch_size))
            # if teach_forcing or decoder_input.shape[0] < cfg.batch_size:
            #     for di in range(1, target_variable.shape[0]):
            #
            #         # tensor2image(cpu_images[di])
            #         # print(cpu_texts[di])
            #         # print(target_variable[di])
            #         # print([converter.decode(item) for item in target_variable[di]])
            #
            #         decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)
            #         decoder_output, decoder_hidden, decoder_attention = model(image)
            #         loss += criterion(decoder_output, target_variable[di])
            #         decoder_input = target_variable[di]
            # else:
            #     for di in range(1, target_variable.shape[0]):
            #         decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)
            #         loss += criterion(decoder_output, target_variable[di])
            #         topv, topi = decoder_output.data.topk(1)
            #         ni = topi.squeeze()
            #         decoder_input = ni
            # encoder.zero_grad()
            # decoder.zero_grad()

            output_dim = output.shape[-1]
            # print(output_dim)

            output = output[1:].view(-1, output_dim)
            target_variable = target_variable[1:].view(-1)

            # trg = [(trg len - 1) * batch size]
            # output = [(trg len - 1) * batch size, output dim]

            loss = criterion(output, target_variable)
            loss.backward()

            torch.nn.utils.clip_grad_norm_(model.parameters(), clip)

            optimizer.step()
            # encoder_optimizer.step()
            # decoder_optimizer.step()

            loss_avg.add(loss)

            if i % 1 == 0:
                print('[Epoch {0}/{1}] [Batch {2}/{3}] Loss: {4}'.format(
                    epoch + 1, cfg.num_epochs, i + 1, len(train_loader),
                    loss_avg.val()))
                loss_avg.reset()
def train(image,
          text,
          encoder,
          decoder,
          criterion,
          train_loader,
          teach_forcing_prob=0.5):
    # optimizer
    encoder_optimizer = torch.optim.Adam(encoder.parameters(),
                                         lr=cfg.learning_rate,
                                         betas=(0.5, 0.999))
    decoder_optimizer = torch.optim.Adam(decoder.parameters(),
                                         lr=cfg.learning_rate,
                                         betas=(0.5, 0.999))

    # loss averager
    loss_avg = utils.Averager()

    for epoch in range(cfg.num_epochs):
        train_iter = iter(train_loader)

        for i in range(len(train_loader)):
            encoder_optimizer.zero_grad()
            decoder_optimizer.zero_grad()

            cpu_images, cpu_texts = train_iter.next()
            batch_size = cpu_images.size(0)

            for encoder_param, decoder_param in zip(encoder.parameters(),
                                                    decoder.parameters()):
                encoder_param.requires_grad = True
                decoder_param.requires_grad = True
            encoder.train()
            decoder.train()

            # formula = formulas(int(cpu_texts))
            target_variable = converter.encode(cpu_texts)
            utils.load_data(image, cpu_images)

            # CNN + BiLSTM
            encoder_outputs = encoder(image)
            if torch.cuda.is_available():
                target_variable = target_variable.cuda()
                # start decoder for SOS_TOKEN
                decoder_input = target_variable[utils.SOS_TOKEN].cuda()
                decoder_hidden = decoder.initHidden(batch_size).cuda()
            else:
                decoder_input = target_variable[utils.SOS_TOKEN]
                decoder_hidden = decoder.initHidden(batch_size)

            # # if i == 28:
            # # outputs for the test
            # print(f'    decoder_input {0}', decoder_input.shape)
            # print(f'    decoder_hidden{0}', decoder_hidden.shape)
            # print(f'    encoder_outputs{0}', encoder_outputs.shape)

            # tensor2image(cpu_images[0])
            # print(cpu_texts[0])
            # print(target_variable[0])

            loss = 0.0
            teach_forcing = True if random.random(
            ) > teach_forcing_prob else False
            # print('    teach_forcing: {}'.format(teach_forcing))
            # print('    decoder_input.shape[0] {}, batch_size {}, batch_size condition: {}'.format(decoder_input.shape[0], batch_size, decoder_input.shape[0] < batch_size))
            if teach_forcing or decoder_input.shape[0] < cfg.batch_size:
                for di in range(1, target_variable.shape[0]):

                    # tensor2image(cpu_images[di])
                    # print(cpu_texts[di])
                    # print(target_variable[di])
                    # print([converter.decode(item) for item in target_variable[di]])

                    decoder_output, decoder_hidden, decoder_attention = decoder(
                        decoder_input, decoder_hidden, encoder_outputs)
                    loss += criterion(decoder_output, target_variable[di])
                    decoder_input = target_variable[di]
            else:
                for di in range(1, target_variable.shape[0]):
                    decoder_output, decoder_hidden, decoder_attention = decoder(
                        decoder_input, decoder_hidden, encoder_outputs)
                    loss += criterion(decoder_output, target_variable[di])
                    topv, topi = decoder_output.data.topk(1)
                    ni = topi.squeeze()
                    decoder_input = ni

                    # print('predict: {}'.format(converter.decode(ni[0])))
                    # print('target: {}'.format(converter.decode(target_variable[di][0])))
            encoder.zero_grad()
            decoder.zero_grad()
            loss.backward()

            # torch.nn.utils.clip_grad_norm_(encoder.parameters(), clip)
            # torch.nn.utils.clip_grad_norm_(decoder.parameters(), clip)
            encoder_optimizer.step()
            decoder_optimizer.step()

            loss_avg.add(loss)

            if i % 1 == 0:
                print('[Epoch {0}/{1}] [Batch {2}/{3}] Loss: {4}'.format(
                    epoch + 1, cfg.num_epochs, i + 1, len(train_loader),
                    loss_avg.val()))
                loss_avg.reset()

        # save checkpoint
        torch.save(encoder.state_dict(),
                   '{0}/encoder_{1}.pth'.format(cfg.model, epoch))
        torch.save(decoder.state_dict(),
                   '{0}/decoder_{1}.pth'.format(cfg.model, epoch))