コード例 #1
0
ファイル: main.py プロジェクト: ravindra098/CRNN
def main(epoch_num, lr=0.1, training=True, fix_width=True):
    """
    Main

    Args:
        training (bool, optional): If True, train the model, otherwise test it (default: True)
        fix_width (bool, optional): Scale images to fixed size (default: True)
    """

    model_path = ('fix_width_' if fix_width else '') + 'crnn.pth'
    letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
    root = 'data/IIIT5K/'
    if training:
        net = CRNN(1, len(letters) + 1)
        start_epoch = 0
        # if there is pre-trained model, load it
        if os.path.exists(model_path):
            print('Pre-trained model detected.\nLoading model...')
            net.load_state_dict(torch.load(model_path))
        if torch.cuda.is_available():
            print('GPU detected.')
        net = train(root,
                    start_epoch,
                    epoch_num,
                    letters,
                    net=net,
                    lr=lr,
                    fix_width=fix_width)
        # save the trained model for training again
        torch.save(net.state_dict(), model_path)
        # test
        test(root, net, letters, fix_width=fix_width)
    else:
        net = CRNN(1, len(letters) + 1)
        if os.path.exists(model_path):
            net.load_state_dict(torch.load(model_path))
        test(root, net, letters, fix_width=fix_width)
コード例 #2
0
ファイル: train.py プロジェクト: LiBiying/OCR_XJTU_RPLN

#    elif isinstance(m, nn.Linear):
#        m.weight.data.normal_(0.0, 0.02)
#        m.bias.data.fill_(0)

net = CRNN(48,
           1,
           len(char2index),
           256,
           opt.nrnn,
           opt.dropout,
           opt.variational_dropout,
           leakyRelu=True)
print(net)
params = net.state_dict()
params_shape = []
for k, v in params.items():
    #    print(k, v.numpy().shape, reduce(mul, v.numpy().shape))
    params_shape.append(reduce(mul, v.numpy().shape))
params_total = sum(params_shape)
print('params_total:', params_total)

if opt.finetune:
    print('Loading model from', opt.modeldir + opt.modelname)
    net.load_state_dict(torch.load(opt.modeldir + opt.modelname))
else:
    print('create new model')
    net.apply(weights_init)

if opt.ngpu > 1:
コード例 #3
0
def train(path=None):
    dataset = FakeTextImageGenerator(batch_size=16).iter()

    criterion = CTCLoss(reduction="mean", zero_infinity=True)

    net = CRNN(nclass=100).float()
    optimizer = optim.Adam(net.parameters(), lr=0.001)

    if path:
        checkpoint = torch.load(path)
        net.load_state_dict(checkpoint["model_state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
        epoch = checkpoint["epoch"]
        loss = checkpoint["loss"]
        print(f"model current epoch: {epoch} with loss: {loss}")

    # loop over the dataset multiple times
    for epoch in range(1, 1000):
        running_loss = 0.0
        loop = tqdm(range(100))
        for i in loop:
            data = next(dataset)
            images = data["the_inputs"]
            labels = data["the_labels"]
            input_length = data["input_length"]
            label_length = data["label_length"]
            targets = data["targets"]

            # print("target", targets)
            # print("target l", targets.size())
            # print("label_l", label_length)
            # print("label_l l", label_length.size())
            # print("pred_l", input_length)
            # print("pred_l l", input_length.size())

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(images.float())
            # print(outputs[8, 0, :])
            # print(outputs[:, 0, :])
            # print(outputs.size())
            loss = criterion(outputs, labels, input_length, label_length)

            # print(loss.item())
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            loop.set_postfix(epoch=epoch, loss=(running_loss / (i + 1)))

        # print(f"Epoch: {epoch} | Loss: {running_loss/100}")
        torch.save(
            {
                "epoch": epoch,
                "model_state_dict": net.state_dict(),
                "optimizer_state_dict": optimizer.state_dict(),
                "loss": running_loss,
            },
            "checkpoint5.pt",
        )

    print("Finished Training")
コード例 #4
0
    return accuarcy


for i in range(option.nepoch):
    for j, (input, label) in enumerate(trainset_dataloader):
        if j == len(trainset_dataloader) - 1:
            continue
        crnn.zero_grad()
        label, length = converter.encode(label)
        input = input.cuda()
        predicted_label = crnn(input)
        predicted_length = [predicted_label.size(0)] * option.batch_size
        label = torch.tensor(label, dtype=torch.long)
        label = label.cuda()
        predicted_length = torch.tensor(predicted_length, dtype=torch.long)
        length = torch.tensor(length, dtype=torch.long)
        loss = loss_function(predicted_label, label, predicted_length, length)
        loss.backward()
        optimizer.step()

        total_loss += loss
        if j % print_every == 0:
            print('[%d / %d] [%d / %d] loss: %.4f' %
                  (i, option.nepoch, j, len(trainset_dataloader),
                   total_loss / print_every))
            total_loss = 0

    accuracy = validation()
    print('save model...')
    torch.save(crnn.state_dict(), 'model/crnn_%d_%.4f.pth' % (i, accuracy))
コード例 #5
0
def train():
    epoch_num = train_parameters["num_epochs"]
    batch_size = train_parameters["train_batch_size"]

    place = fluid.CUDAPlace(
        0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace()
    logger.info('train with {}'.format(place))
    with fluid.dygraph.guard(place):
        # 数据加载
        file_list = open(train_parameters['train_list']).readlines()
        train_reader = get_loader(
            file_list=file_list,
            input_size=train_parameters['input_size'],
            max_char_per_line=train_parameters['max_char_per_line'],
            mean_color=train_parameters['mean_color'],
            batch_size=train_parameters['train_batch_size'],
            mode='train',
            label_dict=train_parameters['label_dict'],
            place=place)

        batch_num = len(train_reader())

        crnn = CRNN(train_parameters["class_dim"] + 1, batch_size=batch_size)
        total_step = batch_num * epoch_num
        LR = train_parameters['learning_rate']
        lr = fluid.layers.polynomial_decay(LR, total_step, 1e-7, power=0.9)
        # lr = fluid.layers.piecewise_decay([total_step // 3, total_step * 2 // 3], [LR, LR * 0.1, LR * 0.01])
        optimizer = fluid.optimizer.Adam(learning_rate=lr,
                                         parameter_list=crnn.parameters())

        if train_parameters["continue_train"]:
            # 加载上一次训练的模型,继续训练
            params_dict, opt_dict = fluid.load_dygraph('{}/crnn_latest'.format(
                train_parameters['save_model_dir']))
            crnn.set_dict(params_dict)
            optimizer.set_dict(opt_dict)
            logger.info("load model from {}".format(
                train_parameters['save_model_dir']))

        current_best = -1
        start_epoch = 0
        for epoch in range(start_epoch, epoch_num):
            crnn.train()
            tic = time.time()
            for batch_id, (img, label, label_len) in enumerate(train_reader()):
                out = crnn(img)

                out_for_loss = fluid.layers.transpose(out, [1, 0, 2])
                input_length = np.array([out.shape[1]] *
                                        out.shape[0]).astype("int64")
                input_length = fluid.dygraph.to_variable(input_length)
                input_length.stop_gradient = True
                loss = fluid.layers.warpctc(
                    input=out_for_loss,
                    label=label.astype(np.int32),
                    input_length=input_length,
                    label_length=label_len,
                    blank=train_parameters["class_dim"],
                    norm_by_times=True)
                avg_loss = fluid.layers.reduce_mean(loss)

                cur_acc_num, cur_all_num = acc_batch(out.numpy(),
                                                     label.numpy())
                if batch_id % 1 == 0:
                    logger.info(
                        "epoch [{}/{}], step [{}/{}], loss: {:.6f}, acc: {:.4f}, lr: {}, time: {:.4f}"
                        .format(epoch, epoch_num, batch_id, batch_num,
                                avg_loss.numpy()[0], cur_acc_num / cur_all_num,
                                optimizer.current_step_lr(),
                                time.time() - tic))
                    tic = time.time()
                avg_loss.backward()
                optimizer.minimize(avg_loss)
                crnn.clear_gradients()

            fluid.save_dygraph(
                crnn.state_dict(),
                '{}/crnn_latest'.format(train_parameters['save_model_dir']))
            fluid.save_dygraph(
                optimizer.state_dict(),
                '{}/crnn_latest'.format(train_parameters['save_model_dir']))
            crnn.eval()
            ratio = eval_model(crnn, place=place)
            if ratio >= current_best:
                fluid.save_dygraph(
                    crnn.state_dict(),
                    '{}/crnn_best'.format(train_parameters['save_model_dir']))
                fluid.save_dygraph(
                    optimizer.state_dict(),
                    '{}/crnn_best'.format(train_parameters['save_model_dir']))
                current_best = ratio
                logger.info("save model to {}, current best acc:{:.2f}".format(
                    train_parameters['save_model_dir'], ratio))
    logger.info("train end")
コード例 #6
0
ファイル: train.py プロジェクト: maiduchoang2498/OCR-CRNN
 encode_text = Variable(encode_text).to(device)
 image = Variable(batch["image"]).to(device)
 model.train()
 output = model(image)
 optimizer.zero_grad()
 output_size = Variable(
     torch.IntTensor([output.size(0)] * opt.batchsize)).to(device)
 loss = lossfunction(output, encode_text, output_size, length)
 loss.backward()
 optimizer.step()
 _, output = output.max(2)
 output = output.transpose(1, 0)
 outputtext = []
 for i in range(0, output.size(0)):
     decode_text = process.decodetext(output[i])
     outputtext += [decode_text]
     number += 1
     if decode_text == raw_text[i]:
         correct += 1
 accuracy = float(correct / number)
 print('epoch:%d-----step:%d/%d-----loss value:%f-----accuracy:%f\n' %
       (epoch, step, len(traindata), loss, accuracy))
 print(raw_text, '\n')
 print(outputtext, '\n')
 writer.add_scalar("Training Loss", loss, step)
 step += 1
 if step % 100 == 0:
     test()
     torch.save(model.state_dict(),
                "savedmodel/epoch%d-step%d.pth" % (epoch, step))
 writer.add_scalar("Training Accuracy", accuracy, epoch)
コード例 #7
0
            output_conv, output_lstm1, output_lstm1, predictions = model.out(
                data)

            total += true_labels.size(0)
            correct += (predictions.max(1)[1] == true_labels).sum().item()
            accuracy = 100 * correct / total

        print('Accuracy of the network on the evaluation dataset: %d %%' %
              (100 * correct / total))

        if accuracy > best_acc:
            best_acc == accuracy
            # SAVE MODEL
            print("SAVING MODEL")
            torch.save(model.state_dict(), "trained_models/best_model.pt")

    test_accuracy.append(accuracy)

    running_loss = 0.0
    for i, (data, true_labels) in enumerate(training_dataloader):

        data = data.type(torch.FloatTensor)
        true_labels = true_labels.type(torch.LongTensor)

        # set all gradients to zero
        model.zero_grad()

        # Here we get the data from all layers, and the corresponding timesteps
        output_conv, output_lstm1, output_lstm2, predictions = model.out(data)
        loss = loss_function(predictions, true_labels)