Exemple #1
0
           leakyRelu=True)
print(net)
params = net.state_dict()
params_shape = []
for k, v in params.items():
    #    print(k, v.numpy().shape, reduce(mul, v.numpy().shape))
    params_shape.append(reduce(mul, v.numpy().shape))
params_total = sum(params_shape)
print('params_total:', params_total)

if opt.finetune:
    print('Loading model from', opt.modeldir + opt.modelname)
    net.load_state_dict(torch.load(opt.modeldir + opt.modelname))
else:
    print('create new model')
    net.apply(weights_init)

if opt.ngpu > 1:
    # print("Let's use", torch.cuda.device_count(), "GPUs!")
    net = nn.DataParallel(net, device_ids=range(opt.ngpu))
net.cuda()
criterion = CTCLoss().cuda()

if opt.adadelta:
    optimizer = optim.Adadelta(net.parameters(),
                               lr=opt.lr)  # , weight_decay=1e-8)
elif opt.rms:
    optimizer = optim.RMSprop(net.parameters(), lr=opt.lr)
else:
    optimizer = optim.Adam(net.parameters(),
                           lr=opt.lr,
Exemple #2
0
nc = 1
nclass = len(option.alphabet) + 1
crnn = CRNN(nc, nclass, option.nh)
crnn = crnn.cuda()


def weight_init(module):
    class_name = module.__class__.__name__
    if class_name.find('Conv') != -1:
        module.weight.data.normal_(0, 0.02)
    if class_name.find('BatchNorm') != -1:
        module.weight.data.normal_(1, 0.02)
        module.bias.data.fill_(0)


crnn.apply(weight_init)

loss_function = CTCLoss(zero_infinity=True)
loss_function = loss_function.cuda()
optimizer = Adadelta(crnn.parameters())
converter = Converter(option.alphabet)
print_every = 100
total_loss = 0.0


def validation():
    print('start validation...')
    crnn.eval()
    total_loss = 0.0
    n_correct = 0
    for i, (input, label) in enumerate(validationset_dataloader):
def main():
    conf_file = "conf/train.yml"
    with open(conf_file, 'r') as f:
        args = edict(yaml.load(f))

    train_root = args.train_root
    test_root = args.test_root
    batch_size = args.batch_size
    max_len = args.max_len
    img_h = args.img_h
    img_w = args.img_w
    n_hidden = args.n_hidden
    n_iter = args.n_iter
    lr = args.lr
    cuda = args.cuda
    val_interval = args.val_interval
    save_interval = args.save_interval
    model_dir = args.model_dir
    debug_level = args.debug_level
    experiment = args.experiment
    n_channel = args.n_channel
    n_class = args.n_class
    beta = args.beta

    image = torch.FloatTensor(batch_size, n_channel, img_h, img_h)
    text = torch.IntTensor(batch_size * max_len)
    length = torch.IntTensor(batch_size)

    logging.getLogger().setLevel(debug_level)
    '''
        50 - critical
        40 - error
        30 - warining
        20 - info
        10 - debug
    '''
    crnn = CRNN(img_h, n_channel, n_class, n_hidden).cuda()
    crnn.apply(weights_init)

    criterion = CTCLoss().cuda()

    optimizer = optim.RMSprop(crnn.parameters(), lr=lr)
    # optimizer = optim.Adam(crnn.parameters(), lr=lr,
    #                    betas=(beta, 0.999))

    trainset = train_set(train_root, batch_size, img_h, img_w, n_class)
    valset = train_set(test_root, batch_size, img_h, img_w, n_class)

    cur_iter = 0
    for ITER in range(n_iter):
        for train_img, train_label, train_lengths, batch_label \
                in iter(trainset):
            for p in crnn.parameters():
                p.requires_grad = True
            crnn.train()

            if train_img is None:
                break
            cur_iter += 1
            loadData(image, train_img)
            loadData(text, train_label)
            loadData(length, train_lengths)
            preds = crnn(train_img.cuda())
            # preds = F.softmax(preds, dim=2)
            # print(preds.shape)
            preds_size = Variable(torch.IntTensor([preds.size(0)] *
                                                  batch_size))
            # print(batch_label, text, length, len(text), len(length), length.sum(),
            #     preds.shape, preds_size.shape)
            cost = criterion(preds, text, preds_size, length)\
                    / batch_size
            crnn.zero_grad()
            cost.backward()
            optimizer.step()
            print("training-iter {} cost {}".format(
                ITER,
                cost.cpu().detach().numpy()[0]))
            if cur_iter % val_interval == 0:
                val(crnn, valset, criterion, n_class)
            if cur_iter % save_interval == 0:
                model_file = os.path.join(model_dir,
                                          "crnn_iter{}.pth".format(ITER))
                print("saving in file {}".format(model_file))
                with open(model_file, 'wb') as f:
                    torch.save(crnn, f)
    def test_train(self):
        '''
        parameters of train
        '''
        # test_root = "data/ocr_dataset_val"
        # train_root = "data/ocr_dataset"
        train_root = "data/ocr_dataset_train_400_10/"
        test_root = "data/ocr_dataset_train_50_10_val/"
        batch_size = 20
        max_len = 15
        img_h, img_w = 32, 150
        n_hidden = 512
        n_iter = 400
        lr = 0.00005
        cuda = True
        val_interval = 250
        save_interval = 1000
        model_dir = "models"
        debug_level = 20
        experiment = "experiment"
        n_channel = 3
        n_class = 11
        beta = 0.5

        image = torch.FloatTensor(batch_size, n_channel, img_h, img_h)
        text = torch.IntTensor(batch_size * max_len)
        length = torch.IntTensor(batch_size)

        logging.getLogger().setLevel(debug_level)
        '''
            50 - critical
            40 - error
            30 - warining
            20 - info
            10 - debug
        '''
        crnn = CRNN(img_h, n_channel, n_class, n_hidden).cuda()
        crnn.apply(weights_init)

        criterion = CTCLoss().cuda()

        optimizer = optim.RMSprop(crnn.parameters(), lr=lr)
        # optimizer = optim.Adam(crnn.parameters(), lr=lr,
        #                    betas=(beta, 0.999))

        trainset = train_set(train_root, batch_size, img_h, img_w, n_class)
        valset = train_set(test_root, batch_size, img_h, img_w, n_class)

        cur_iter = 0
        for ITER in range(n_iter):
            for train_img, train_label, train_lengths, batch_label in iter(
                    trainset):
                for p in crnn.parameters():
                    p.requires_grad = True
                crnn.train()

                if train_img is None:
                    break
                cur_iter += 1
                loadData(image, train_img)
                loadData(text, train_label)
                loadData(length, train_lengths)
                preds = crnn(train_img.cuda())
                # preds = F.softmax(preds, dim=2)
                # print(preds.shape)
                preds_size = Variable(
                    torch.IntTensor([preds.size(0)] * batch_size))
                # print(batch_label, text, length, len(text), len(length), length.sum(),
                #     preds.shape, preds_size.shape)
                cost = criterion(preds, text, preds_size, length) / batch_size
                crnn.zero_grad()
                cost.backward()
                optimizer.step()
                print("training-iter {} cost {}".format(
                    ITER,
                    cost.cpu().detach().numpy()[0]))
                if cur_iter % val_interval == 0:
                    val(crnn, valset, criterion, n_class)
                if cur_iter % save_interval == 0:
                    model_file = os.path.join(model_dir,
                                              "crnn_iter{}.pth".format(ITER))
                    print("saving in file {}".format(model_file))
                    with open(model_file, 'wb') as f:
                        torch.save(crnn, f)