Ejemplo n.º 1
0
def main(opt):
    dataset = VideoDataset(opt, "test")
    opt["vocab_size"] = dataset.get_vocab_size()
    opt["seq_length"] = dataset.max_len
    if opt["model"] == 'S2VTModel':
        model = S2VTModel(opt["vocab_size"],
                          opt["max_len"],
                          opt["dim_hidden"],
                          opt["dim_word"],
                          rnn_dropout_p=opt["rnn_dropout_p"]).cuda()
    elif opt["model"] == "S2VTAttModel":
        encoder = EncoderRNN(opt["dim_vid"],
                             opt["dim_hidden"],
                             bidirectional=opt["bidirectional"],
                             input_dropout_p=opt["input_dropout_p"],
                             rnn_dropout_p=opt["rnn_dropout_p"])
        decoder = DecoderRNN(opt["vocab_size"],
                             opt["max_len"],
                             opt["dim_hidden"],
                             opt["dim_word"],
                             input_dropout_p=opt["input_dropout_p"],
                             rnn_dropout_p=opt["rnn_dropout_p"],
                             bidirectional=opt["bidirectional"])
        model = S2VTAttModel(encoder, decoder).cuda()
    elif opt["model"] == "CTCmodel":
        model = CTCmodel(opt['vocab_size'] + 1, opt['dim_hidden'])
    # model = nn.DataParallel(model)
    # Setup the model
    model.load_state_dict(torch.load(opt["saved_model"]))
    model.cuda()
    crit = CTCLoss()
    crit = crit.cuda()
    # crit = utils.LanguageModelCriterion()

    test(model, crit, dataset, dataset.get_vocab(), opt)
Ejemplo n.º 2
0
def train(root,
          start_epoch,
          epoch_num,
          letters,
          net=None,
          lr=0.1,
          fix_width=True):
    trainloader = load_data(root, training=True, fix_width=fix_width)
    use_cuda = torch.cuda.is_available()
    if not net:
        net = CRNN(1, len(letters) + 1)
    criterion = CTCLoss()
    optimizer = optim.Adadelta(net.parameters(), lr=lr)
    if use_cuda:
        net = net.cuda()
        criterion = criterion.cuda()
    labeltransformer = LabelTransformer(letters)

    print('====   Training..   ====')
    net.train()
    for epoch in range(start_epoch, start_epoch + epoch_num):
        print('----    epoch: %d    ----' % (epoch, ))
        loss_sum = 0
        for i, (img, label) in enumerate(trainloader):
            label, label_length = labeltransformer.encode(label)
            if use_cuda:
                img = img.cuda()
            img, label = Variable(img), Variable(label)
            label_length = Variable(label_length)
            optimizer.zero_grad()

            outputs = net(img)
            output_length = Variable(
                torch.IntTensor([outputs.size(0)] * outputs.size(1)))
            loss = criterion(outputs, label, output_length, label_length)
            loss.backward()
            optimizer.step()
            loss_sum += loss.data[0]
        print('loss = %f' % (loss_sum, ))
    print('Finished Training')
    return net
Ejemplo n.º 3
0
    crnn.load_state_dict(torch.load(opt.crnn))
print(crnn)

image = torch.FloatTensor(opt.batchSize, 3, opt.imgH, opt.imgH)
textAttention = torch.LongTensor(opt.batchSize * 5)
lengthAttention = torch.IntTensor(opt.batchSize)
textCTC = torch.IntTensor(opt.batchSize * 5)
lengthCTC = torch.IntTensor(opt.batchSize)

if opt.cuda:
    crnn.cuda()
    crnn = torch.nn.DataParallel(crnn, device_ids=range(opt.ngpu))
    image = image.cuda()
    textAttention = textAttention.cuda()
    criterionAttention = criterionAttention.cuda()
    criterionCTC = criterionCTC.cuda()

image = Variable(image)
textAttention = Variable(textAttention)
lengthAttention = Variable(lengthAttention)
textCTC = Variable(textCTC)
lengthCTC = Variable(lengthCTC)


# loss averager
loss_avg = utils.averager()
loss_CTC = utils.averager()
loss_Attention = utils.averager()

# setup optimizer
if opt.adam:
Ejemplo n.º 4
0
def main(config_yaml):
    '''
    Training/Finetune CNN_RNN_Attention Model.
    '''
    #### Load config settings. ####
    f = open(config_yaml, encoding='utf-8')
    opt = yaml.load(f)
    if os.path.isdir(opt['LOGGER_PATH']) == False:
        os.mkdir(opt['LOGGER_PATH'])
    logger = Logger(opt['LOGGER_PATH'])
    if os.path.isdir(opt['SAVE_PATH']) == False:
        os.system('mkdir -p {0}'.format(opt['SAVE_PATH']))
    manualSeed = random.randint(1, 10000)
    random.seed(manualSeed)
    np.random.seed(manualSeed)
    torch.manual_seed(manualSeed)
    cudnn.benchmark = True

    #### Set up DataLoader. ####
    train_cfg = opt['TRAIN']
    ds_cfg = train_cfg['DATA_SOURCE']
    print('Building up dataset:{}'.format(ds_cfg['TYPE']))
    if ds_cfg['TYPE'] == 'SYN_DATA':
        text_gen = util.TextGenerator(ds_cfg['GEN_SET'], ds_cfg['GEN_LEN'])
        ds_train = dataset.synthDataset(ds_cfg['FONT_ROOT'],
                                        ds_cfg['FONT_SIZE'], text_gen)
    elif ds_cfg['TYPE'] == 'IMG_DATA':
        ds_train = dataset.trainDataset(
            ds_cfg['IMG_ROOT'], ds_cfg['TRAIN_SET'],
            transform=None)  #dataset.graybackNormalize()
    assert ds_train
    train_loader = torch.utils.data.DataLoader(
        ds_train,
        batch_size=train_cfg['BATCH_SIZE'],
        shuffle=True,
        sampler=None,
        num_workers=opt['WORKERS'],
        collate_fn=dataset.alignCollate(imgH=train_cfg['IMG_H'],
                                        imgW=train_cfg['MAX_W']))

    val_cfg = opt['VALIDATION']
    ds_val = dataset.testDataset(val_cfg['IMG_ROOT'],
                                 val_cfg['VAL_SET'],
                                 transform=None)  #dataset.graybackNormalize()
    assert ds_val
    val_loader = torch.utils.data.DataLoader(ds_val,
                                             batch_size=32,
                                             shuffle=False,
                                             num_workers=opt['WORKERS'],
                                             collate_fn=dataset.alignCollate(
                                                 imgH=train_cfg['IMG_H'],
                                                 imgW=train_cfg['MAX_W']))

    #### Model construction and Initialization. ####
    alphabet = keys.alphabet
    nClass = len(alphabet) + 1

    if opt['N_GPU'] > 1:
        opt['RNN']['multi_gpu'] = True
    else:
        opt['RNN']['multi_gpu'] = False
    model = crann.CRANN(opt, nClass)
    #print(model)

    #### Train/Val the model. ####
    converter = util.strLabelConverter(alphabet)
    criterion = CTCLoss()
    if opt['CUDA']:
        model.cuda()
        criterion.cuda()

    if opt['OPTIMIZER'] == 'RMSprop':
        optimizer = optim.RMSprop(model.parameters(), lr=opt['TRAIN']['LR'])
    elif opt['OPTIMIZER'] == 'Adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=opt['TRAIN']['LR'],
                               betas=(opt['TRAIN']['BETA1'], 0.999))
    elif opt['OPTIMIZER'] == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=opt['TRAIN']['LR'])
    else:
        optimizer = optim.Adadelta(model.parameters(), lr=opt['TRAIN']['LR'])

    start_epoch = 0
    if opt['VAL_ONLY']:
        print('=>loading pretrained model from %s for val only.' %
              opt['CRANN'])
        checkpoint = torch.load(opt['CRANN'])
        model.load_state_dict(checkpoint['state_dict'])
        val(model, val_loader, criterion, converter, 0, 0, logger, True)
    elif opt['FINETUNE']:
        print('=>loading pretrained model from %s for finetuen.' %
              opt['CRANN'])
        checkpoint = torch.load(opt['CRANN'])
        #model.load_state_dict(checkpoint['state_dict'])
        model_dict = model.state_dict()
        #print(model_dict.keys())
        cnn_dict = {
            "cnn." + k: v
            for k, v in checkpoint.items() if "cnn." + k in model_dict
        }
        model_dict.update(cnn_dict)
        model.load_state_dict(model_dict)
        for epoch in range(start_epoch, opt['EPOCHS']):
            adjust_lr(optimizer, opt['TRAIN']['LR'], epoch, opt['STEP'])
            train(model, train_loader, val_loader, criterion, optimizer, opt,
                  converter, epoch, logger)
    elif opt['RESUME']:
        print('=>loading checkpoint from %s for resume training.' %
              opt['CRANN'])
        checkpoint = torch.load(opt['CRANN'])
        start_epoch = checkpoint['epoch'] + 1
        print('resume from epoch:{}'.format(start_epoch))
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        for epoch in range(start_epoch, opt['EPOCHS']):
            adjust_lr(optimizer, opt['TRAIN']['LR'], epoch, opt['STEP'])
            train(model, train_loader, val_loader, criterion, optimizer, opt,
                  converter, epoch, logger)
    else:
        print('train from scratch.')
        for epoch in range(start_epoch, opt['EPOCHS']):
            adjust_lr(optimizer, opt['TRAIN']['LR'], epoch, opt['STEP'])
            train(model, train_loader, val_loader, criterion, optimizer, opt,
                  converter, epoch, logger)
Ejemplo n.º 5
0
# for name, module in crnn.named_children():
#     if name == 'cnn':
#         module_dict = module.state_dict()
#         new_state_dict = {k: v for k, v in new_state_dict.items() if k in module_dict}

#        module_dict.update(new_state_dict)
#         module.load_state_dict(module_dict)

image = torch.FloatTensor(opt.batchSize, 3, opt.imgH, opt.imgH)
text = torch.IntTensor(opt.batchSize * 5)
length = torch.IntTensor(opt.batchSize)

if opt.cuda:
    crnn = crnn.cuda(id_gpu)
    image = image.cuda(id_gpu)
    criterion = criterion.cuda(id_gpu)

image = Variable(image)
text = Variable(text)
length = Variable(length)

# loss averager
loss_avg = util.averager()

# setup optimizer
if opt.adam:
    optimizer = optim.Adam(crnn.parameters(),
                           lr=opt.lr,
                           betas=(opt.beta1, 0.999))
elif opt.adadelta:
    optimizer = optim.Adadelta(crnn.parameters(), lr=opt.lr)
Ejemplo n.º 6
0
        m.bias.data.fill_(0)


crnn = CRNN(nchannels, nclass, opt.nhidden)
crnn.apply(weights_init)

image = torch.FloatTensor(opt.batch_size, 1, 1, 1)
text = torch.IntTensor(opt.batch_size * 5)
length = torch.IntTensor(opt.batch_size)


if opt.cuda:
    torch.cuda.set_device(opt.gpu_choice)
    crnn = crnn.cuda(opt.gpu_choice)
    image = image.cuda(opt.gpu_choice)
    criterion = criterion.cuda(opt.gpu_choice)

if opt.crnn != '':
    print('loading pretrained model from %s' % opt.crnn)
    crnn.load_state_dict(torch.load(opt.crnn))

image = Variable(image)
text = Variable(text)
length = Variable(length)

# loss averager
loss_avg = utils.averager()

# setup optimizer
if opt.rms:
    optimizer = optim.RMSprop(crnn.parameters(), lr=opt.lr)
Ejemplo n.º 7
0
def main(opt):
    print(opt)

    if opt.experiment is None:
        opt.experiment = 'expr'

    os.system('mkdir {0}'.format(opt.experiment))

    # Why is this?
    opt.manualSeed = random.randint(1, 10000)  # fix seed

    print("Random Seed: ", opt.manualSeed)
    random.seed(opt.manualSeed)
    np.random.seed(opt.manualSeed)
    torch.manual_seed(opt.manualSeed)

    cudnn.benchmark = True

    if torch.cuda.is_available() and not opt.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    train_dataset = dataset.hwrDataset(mode="train")
    assert train_dataset
    # if not opt.random_sample:
    #     sampler = dataset.randomSequentialSampler(train_dataset, opt.batchSize)
    # else:
    #     sampler = None
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=opt.batchSize,
                                               shuffle=True,
                                               num_workers=int(opt.workers),
                                               collate_fn=dataset.alignCollate(
                                                   imgH=opt.imgH,
                                                   imgW=opt.imgW,
                                                   keep_ratio=True))
    # test_dataset = dataset.lmdbDataset(
    #     root=opt.valroot, transform=dataset.resizeNormalize((100, 32)))

    test_dataset = dataset.hwrDataset(mode="test",
                                      transform=dataset.resizeNormalize(
                                          (100, 32)))

    nclass = len(opt.alphabet) + 1
    nc = 1

    criterion = CTCLoss()

    # custom weights initialization called on crnn
    def weights_init(m):
        classname = m.__class__.__name__
        if classname.find('Conv') != -1:
            m.weight.data.normal_(0.0, 0.02)
        elif classname.find('BatchNorm') != -1:
            m.weight.data.normal_(1.0, 0.02)
            m.bias.data.fill_(0)

    crnn = crnn_model.CRNN(opt.imgH, nc, nclass, opt.nh)
    crnn.apply(weights_init)
    if opt.crnn != '':
        print('loading pretrained model from %s' % opt.crnn)
        crnn.load_state_dict(torch.load(opt.crnn))
    print(crnn)

    # TODO make this central

    image = torch.FloatTensor(opt.batchSize, 3, opt.imgH, opt.imgH)
    text = torch.IntTensor(opt.batchSize * 5)
    length = torch.IntTensor(opt.batchSize)

    if opt.cuda:
        crnn.cuda()
        crnn = torch.nn.DataParallel(crnn, device_ids=range(opt.ngpu))
        image = image.cuda()
        criterion = criterion.cuda()

    image = Variable(image)
    text = Variable(text)
    length = Variable(length)

    # TODO what is this, read this.
    # loss averager
    loss_avg = utils.averager()

    # Todo default is RMS Prop. I wonder why?
    # setup optimizer

    #Following the paper's recommendation

    opt.adadelta = True
    if opt.adam:
        optimizer = optim.Adam(crnn.parameters(),
                               lr=opt.lr,
                               betas=(opt.beta1, 0.999))
    elif opt.adadelta:
        optimizer = optim.Adadelta(crnn.parameters(), lr=opt.lr)
    else:
        optimizer = optim.RMSprop(crnn.parameters(), lr=opt.lr)

    converter = utils.strLabelConverter(opt.alphabet)

    def val(net, dataset, criterion, max_iter=100):
        print('Start val')

        for p in crnn.parameters():
            p.requires_grad = False

        net.eval()
        data_loader = torch.utils.data.DataLoader(dataset,
                                                  shuffle=True,
                                                  batch_size=opt.batchSize,
                                                  num_workers=int(opt.workers))
        val_iter = iter(data_loader)

        n_correct = 0
        loss_avg = utils.averager()

        max_iter = min(max_iter, len(data_loader))
        for i in range(max_iter):
            print("Is 'i' jumping two values? i == " + str(i))
            data = val_iter.next()
            i += 1
            cpu_images, cpu_texts = data
            batch_size = cpu_images.size(0)
            utils.loadData(image, cpu_images)
            t, l = converter.encode(cpu_texts)
            utils.loadData(text, t)
            utils.loadData(length, l)

            preds = crnn(image)
            preds_size = Variable(torch.IntTensor([preds.size(0)] *
                                                  batch_size))
            cost = criterion(preds, text, preds_size, length) / batch_size
            loss_avg.add(cost)

            _, preds = preds.max(
                2
            )  # todo where is the output size set to 26? Empirically it is.
            # preds = preds.squeeze(2)
            preds = preds.transpose(1, 0).contiguous().view(-1)
            sim_preds = converter.decode(preds.data,
                                         preds_size.data,
                                         raw=False)  # Todo read this.
            for pred, target in zip(sim_preds, cpu_texts):
                if pred == target.lower():
                    n_correct += 1

        raw_preds = converter.decode(preds.data, preds_size.data,
                                     raw=True)[:opt.n_test_disp]
        for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts):
            print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))

        accuracy = n_correct / float(max_iter * opt.batchSize)
        print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))

    for epoch in range(opt.niter):
        train_iter = iter(train_loader)
        i = 0
        while i < len(train_loader):
            for p in crnn.parameters():
                p.requires_grad = True
            crnn.train()

            cost = train_batch(crnn, criterion, optimizer, train_iter, opt,
                               converter)
            loss_avg.add(cost)
            i += 1

            if i % opt.displayInterval == 0:
                print('[%d/%d][%d/%d] Loss: %f' %
                      (epoch, opt.niter, i, len(train_loader), loss_avg.val()))
                loss_avg.reset()

            if i % opt.valInterval == 0:
                try:
                    val(crnn, test_dataset, criterion)
                except Exception as e:
                    print(e)

            # do checkpointing
            if i % opt.saveInterval == 0:
                torch.save(
                    crnn.state_dict(),
                    '{0}/netCRNN_{1}_{2}.pth'.format(opt.experiment, epoch, i))
Ejemplo n.º 8
0
class LanguageModelTrainer:
    def __init__(self, model, loader, val_loader, test_loader, max_epochs=1, run_id='exp'):
        """
            Use this class to train your model
        """
        # feel free to add any other parameters here
        self.model = model.cuda() if torch.cuda.is_available() else model
        self.loader = loader
        self.val_loader = val_loader
        self.test_loader = test_loader
        self.train_losses = []
        self.val_losses = []
        self.predictions = []
        self.predictions_test = []
        self.generated_logits = []
        self.generated = []
        self.generated_logits_test = []
        self.generated_test = []
        self.epochs = 0
        self.max_epochs = max_epochs
        self.run_id = run_id

        self.optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-6)
        # self.optimizer = torch.optim.SGD(model.parameters(), lr=0.0001, weight_decay=1e-6, momentum=0.9)
        self.criterion = CTCLoss()#size_average=True, length_average=False)
        self.criterion = self.criterion.cuda() if torch.cuda.is_available() else self.criterion
        self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, factor=0.1, patience=2)
        self.LD = Levenshtein(phoneme_list.PHONEME_MAP)
        self.best_rate = 1e10
        self.decoder = CTCBeamDecoder(labels=[' '] + phoneme_list.PHONEME_MAP, blank_id=0, beam_width=150)

    def train(self):
        self.model.train()  # set to training mode
        for epoch in range(self.max_epochs):
            epoch_loss = 0
            training_epoch_loss = 0
            for batch_num, (inputs, targets) in enumerate(self.loader):
                # # debug
                # # Save init values
                # old_state_dict = {}
                # for key in model.state_dict():
                #     old_state_dict[key] = model.state_dict()[key].clone()
                #
                # # Your training procedure
                # loss = self.train_batch(inputs, targets)
                #
                # # Save new params
                # new_state_dict = {}
                # for key in model.state_dict():
                #     new_state_dict[key] = model.state_dict()[key].clone()
                #
                # # Compare params
                # for key in old_state_dict:
                #     if (old_state_dict[key] == new_state_dict[key]).all():
                #         print('No diff in {}'.format(key))
                # print('Batch loss is ', float(loss))

                loss = self.train_batch(inputs, targets)
                epoch_loss += loss
                training_epoch_loss += loss
                # training print
                batch_print = 40
                if batch_num % batch_print == 0 and batch_num != 0:
                    self.print_training(batch_num, self.loader.batch_size, training_epoch_loss, batch_print)
                    training_epoch_loss = 0

            epoch_loss = epoch_loss / (batch_num + 1)
            self.epochs += 1
            self.scheduler.step(epoch_loss)
            print('[TRAIN]  Epoch [%d/%d]   Loss: %.4f'
                  % (self.epochs, self.max_epochs, epoch_loss))
            self.train_losses.append(epoch_loss)
            # log loss
            tLog.log_scalar('training_loss', epoch_loss, self.epochs)
            # log values and gradients of parameters (histogram summary)
            for tag, value in model.named_parameters():
                tag = tag.replace('.', '/')
                tLog.log_histogram(tag, value.data.cpu().numpy(), self.epochs)
                tLog.log_histogram(tag+'/grad', value.grad.data.cpu().numpy(), self.epochs)

            # every 1 epochs, print validation statistics
            epochs_print = 1
            if self.epochs % epochs_print == 0 and not self.epochs == 0:
                with torch.no_grad():
                    t = "#########  Epoch {} #########".format(self.epochs)
                    print(t)
                    logging.info(t)
                    ls = 0
                    lens = 0
                    for j, (val_inputs, val_labels) in (enumerate(self.val_loader)):
                        idx = np.random.randint(0, len(val_inputs))
                        print('Pred', self.gen_batch(val_inputs[idx:idx + 1]))
                        print('Ground', ''.join([phoneme_list.PHONEME_MAP[o - 1] for o in val_labels[idx]]))
                        val_output, _, feature_lengths = self.model(val_inputs)
                        ls += self.LD.forward(val_output, val_labels, feature_lengths)
                        lens += len(val_inputs)
                    ls /= lens
                    t = "Validation LD {}:".format(ls)
                    print(t)
                    logging.info(t)
                    t = '--------------------------------------------'
                    print(t)
                    logging.info(t)
                    # log loss
                    vLog.log_scalar('LD', ls, self.epochs)
                    if self.best_rate > ls:
                        torch.save(model.state_dict(), "models/checkpoint.pt")
                        self.best_rate = ls

    def print_training(self, batch_num, batch_size, loss, batch_print):
        t = 'At {:.0f}% of epoch {}'.format(
            batch_num * batch_size / self.loader.dataset.num_entries * 100, self.epochs)
        print(t)
        logging.info(t)
        t = "Training loss : {}".format(loss / batch_print)
        print(t)
        logging.info(t)
        t = '--------------------------------------------'
        print(t)
        logging.info(t)

    def train_batch(self, inputs, targets):
        lens_tar = torch.Tensor([len(target) for target in targets])  # lens of all targets (sorted by loader)
        targets = torch.cat(targets)
        targets = targets.cuda() if torch.cuda.is_available() else targets
        outputs, _, lens_in = self.model(inputs)  # T x B x num_phonema, ignore hidden
        lens_in = torch.Tensor(lens_in)
        loss = self.criterion(outputs, targets.int().cpu(), lens_in.int().cpu(), lens_tar.int().cpu())
        loss.backward()
        self.optimizer.step()
        self.optimizer.zero_grad()
        # print([i for i in model.cnn.modules()][1].__dict__['_parameters']['weight'][0])
        return float(loss)  # avoid autograd retention

    def test(self):
        preds = []
        for i, inputs in enumerate(self.test_loader):
            pred = self.gen_batch(inputs)
            preds += pred
        return preds

    def gen_batch(self, data_batch):
        scores, _, out_lengths = model(data_batch)
        out_lengths = torch.Tensor(out_lengths)
        scores = torch.transpose(scores, 0, 1)
        probs = F.softmax(scores, dim=2).data.cpu()
        output, scores, timesteps, out_seq_len = self.decoder.decode(probs=probs, seq_lens=out_lengths)
        out_seq = []
        for i in range(output.size(0)):
            chrs = [phoneme_list.PHONEME_MAP[o.item() - 1] for o in output[i, 0, :out_seq_len[i, 0]]]
            out_seq.append("".join(chrs))
        return out_seq
Ejemplo n.º 9
0
    else:
        for k, v in model_dict.items():
            if (k != weig1 or k != bias1):
                model_dict[k] = pre_trainmodel[k]
        crnn.load_state_dict(model_dict)
print(crnn)

image = torch.FloatTensor(opt.batchSize, 3, opt.imgH, opt.imgH)
text = torch.IntTensor(opt.batchSize * 5)
length = torch.IntTensor(opt.batchSize)

if opt.cuda:
    crnn.cuda()
    crnn = torch.nn.DataParallel(crnn, device_ids=range(opt.ngpu))
    image = image.cuda()
    criterion = criterion.cuda()

image = Variable(image)
text = Variable(text)
length = Variable(length)

# loss averager
loss_avg = utils.averager()

# setup optimizer
if opt.adam:
    optimizer = optim.Adam(crnn.parameters(),
                           lr=opt.lr,
                           betas=(opt.beta1, 0.999))
elif opt.adadelta:
    optimizer = optim.Adadelta(crnn.parameters(), lr=opt.lr)
Ejemplo n.º 10
0
def main(opt, case):
    print("Arguments are : " + str(opt))

    if opt.experiment is None:
        opt.experiment = 'expr'
    os.system('mkdir {0}'.format(opt.experiment))

    # Why do we use this?
    opt.manualSeed = random.randint(1, 10000)  # fix seed
    print("Random Seed: ", opt.manualSeed)
    random.seed(opt.manualSeed)
    np.random.seed(opt.manualSeed)
    torch.manual_seed(opt.manualSeed)

    cudnn.benchmark = True

    if torch.cuda.is_available() and not opt.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

        opt.cuda = True
        print('Set CUDA to true.')

    train_dataset = dataset.hwrDataset(mode="train")
    assert train_dataset

    # The shuffle needs to be false when the sizing has been done.

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=opt.batchSize,
                                               shuffle=False,
                                               num_workers=int(opt.workers),
                                               collate_fn=dataset.alignCollate(
                                                   imgH=opt.imgH,
                                                   imgW=opt.imgW,
                                                   keep_ratio=True))

    test_dataset = dataset.hwrDataset(mode="test",
                                      transform=dataset.resizeNormalize(
                                          (100, 32)))

    nclass = len(opt.alphabet) + 1
    nc = 1

    criterion = CTCLoss()

    # custom weights initialization called on crnn
    def weights_init(m):
        classname = m.__class__.__name__
        if classname.find('Conv') != -1:
            m.weight.data.normal_(0.0, 0.02)
        elif classname.find('BatchNorm') != -1:
            m.weight.data.normal_(1.0, 0.02)
            m.bias.data.fill_(0)

    crnn = crnn_model.CRNN(opt.imgH, nc, nclass, opt.nh)
    crnn.apply(weights_init)

    if opt.cuda and not opt.uses_old_saving:
        crnn.cuda()
        crnn = torch.nn.DataParallel(crnn, device_ids=range(opt.ngpu))
        criterion = criterion.cuda()

    if opt.crnn != '':

        print('Loading pre-trained model from %s' % opt.crnn)
        loaded_model = torch.load(opt.crnn)

        if opt.uses_old_saving:
            print("Assuming model was saved in rudementary fashion")
            crnn.load_state_dict(loaded_model)
            crnn.cuda()

            crnn = torch.nn.DataParallel(crnn, device_ids=range(opt.ngpu))
            criterion = criterion.cuda()
            start_epoch = 0
        else:
            print("Loaded model accuracy: " + str(loaded_model['accuracy']))
            print("Loaded model epoch: " + str(loaded_model['epoch']))
            start_epoch = loaded_model['epoch']
            crnn.load_state_dict(loaded_model['state'])

    # Read this.
    loss_avg = utils.averager()

    # If following the paper's recommendation, using AdaDelta
    if opt.adam:
        optimizer = optim.Adam(crnn.parameters(),
                               lr=opt.lr,
                               betas=(opt.beta1, 0.999))
    elif opt.adadelta:
        optimizer = optim.Adadelta(crnn.parameters(), lr=opt.lr)
    elif opt.adagrad:
        print("Using adagrad")
        optimizer = optim.Adagrad(crnn.parameters(), lr=opt.lr)
    else:
        optimizer = optim.RMSprop(crnn.parameters(), lr=opt.lr)

    converter = utils.strLabelConverter(opt.alphabet)

    best_val_accuracy = 0

    for epoch in range(start_epoch, opt.niter):
        train_iter = iter(train_loader)
        i = 0
        while i < len(train_loader):
            for p in crnn.parameters():
                p.requires_grad = True
            crnn.train()

            cost = train_batch(crnn, criterion, optimizer, train_iter, opt,
                               converter)
            loss_avg.add(cost)
            i += 1

            if i % opt.displayInterval == 0:
                print(
                    '[%d/%d][%d/%d] Loss: %f' %
                    (epoch, opt.niter, i, len(train_loader), loss_avg.val()) +
                    " " + case)
                loss_avg.reset()

            if i % opt.valInterval == 0:
                try:
                    val_loss_avg, accuracy = val_batch(crnn, opt, test_dataset,
                                                       converter, criterion)

                    model_state = {
                        'epoch': epoch + 1,
                        'iter': i,
                        'state': crnn.state_dict(),
                        'accuracy': accuracy,
                        'val_loss_avg': val_loss_avg,
                    }
                    utils.save_checkpoint(
                        model_state, accuracy > best_val_accuracy,
                        '{0}/netCRNN_{1}_{2}_{3}.pth'.format(
                            opt.experiment, epoch, i,
                            accuracy), opt.experiment)

                    if accuracy > best_val_accuracy:
                        best_val_accuracy = accuracy

                except Exception as e:
                    print(e)
Ejemplo n.º 11
0
class ModuleTrain:
    def __init__(self, train_path, test_path, model_file, model, img_h=32, img_w=110, batch_size=64, lr=1e-3,
                 use_unicode=True, best_loss=0.2, use_gpu=True, workers=1):
        self.model = model
        self.model_file = model_file
        self.use_unicode = use_unicode
        self.img_h = img_h
        self.img_w = img_w
        self.batch_size = batch_size
        self.lr = lr
        self.best_loss = best_loss
        self.best_acc = 0.95
        self.use_gpu = use_gpu
        self.workers = workers

        self.converter = utils.strLabelConverter(alphabet)
        self.criterion = CTCLoss()

        if self.use_gpu:
            print("[use gpu] ...")
            self.model = self.model.cuda()
            self.criterion = self.criterion.cuda()
        if torch.cuda.is_available() and not self.use_gpu:
            print("[WARNING] You have a CUDA device, so you should probably run with --cuda")

        # 加载模型
        if os.path.exists(self.model_file):
            self.load(self.model_file)
        else:
            print('[Load model] error !!!')

        self.transform = T.Compose([
            T.Resize((self.img_h, self.img_w)),
            T.ToTensor(),
            # T.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5])
        ])

        train_label = os.path.join(train_path, 'labels_normal.txt')
        train_dataset = my_dataset.MyDataset(root=train_path, label_file=train_label, transform=self.transform,
                                             is_train=True, img_h=self.img_h, img_w=self.img_w)
        self.train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=self.batch_size,
                                                        shuffle=True, num_workers=int(self.workers))
        test_label = os.path.join(test_path, 'labels_normal.txt')
        test_dataset = my_dataset.MyDataset(root=test_path, label_file=test_label, transform=self.transform,
                                            is_train=False, img_h=self.img_h, img_w=self.img_w)
        self.test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=self.batch_size,
                                                       shuffle=False, num_workers=int(self.workers))

        # setup optimizer
        # if opt.adam:
        #     self.optimizer = optim.Adam(crnn.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
        # elif opt.adadelta:
        #     self.optimizer = optim.Adadelta(crnn.parameters(), lr=opt.lr)
        # else:
        #     self.optimizer = optim.RMSprop(crnn.parameters(), lr=opt.lr)
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr, weight_decay=1e-5)

    def train(self, epoch, decay_epoch=80):
        image = torch.FloatTensor(self.batch_size, 3, self.img_h, self.img_w)
        text = torch.IntTensor(self.batch_size * 5)
        length = torch.IntTensor(self.batch_size)
        image = Variable(image)
        text = Variable(text)
        length = Variable(length)

        print('[train] epoch: %d' % epoch)
        for epoch_i in range(epoch):
            train_loss = 0.0
            correct = 0

            if epoch_i >= decay_epoch and epoch_i % decay_epoch == 0:                   # 减小学习速率
                self.lr = self.lr * 0.1
                for param_group in self.optimizer.param_groups:
                    param_group['lr'] = self.lr
                # self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr, weight_decay=1e-5)

            print('================================================')
            self.model.train()
            for batch_idx, (data, target) in enumerate(self.train_loader):              # 训练
                # data, target = Variable(data), Variable(target)

                if self.use_unicode:
                    target = [tx.decode('utf-8') for tx in target]

                batch_size = data.size(0)
                utils.loadData(image, data)
                t, l = self.converter.encode(target)
                utils.loadData(text, t)
                utils.loadData(length, l)

                if self.use_gpu:
                    image = image.cuda()

                # 梯度清0
                self.optimizer.zero_grad()
                for p in self.model.parameters():
                    p.requires_grad = True

                # 计算损失
                preds = self.model(image)
                preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
                # print('preds_size', preds_size)
                loss = self.criterion(preds, text, preds_size, length)
                # self.model.zero_grad()
                # 反向传播计算梯度
                loss.backward()
                # 更新参数
                self.optimizer.step()
                train_loss += loss.item()

                # print(preds.size())
                # total = 0.0
                # print('len', len(preds.data[0][0]))
                # for i in range(len(preds.data[0][0])):
                #     total += preds.data[0][0][i]
                #     print('total', total)

                _, preds = preds.max(2)
                # print(preds.size())
                # preds = preds.squeeze(2)
                preds = preds.transpose(1, 0).contiguous().view(-1)
                # print(preds.size())
                sim_preds = self.converter.decode(preds.data, preds_size.data, raw=False)
                # print(sim_preds)
                # print(target)
                # total_preds = self.converter.decode(preds.data, preds_size.data, raw=True)
                # print(total_preds)
                for pred, target in zip(sim_preds, target):
                    if pred.strip() == target.strip():
                        correct += 1

            train_loss /= len(self.train_loader.dataset)
            acc = float(correct) / float(len(self.train_loader.dataset))
            print('[Train] Epoch: {} \tLoss: {:.6f}\tAcc: {:.6f}\tlr: {}'.format(epoch_i, train_loss, acc, self.lr))

            # Test
            test_loss, test_acc = self.test()
            if test_loss < self.best_loss:
                self.best_loss = test_loss
                str_list = self.model_file.split('.')
                best_model_file = ""
                for str_index in range(len(str_list)):
                    best_model_file = best_model_file + str_list[str_index]
                    if str_index == (len(str_list) - 2):
                        best_model_file += '_best'
                    if str_index != (len(str_list) - 1):
                        best_model_file += '.'
                self.save(best_model_file)  # 保存最好的模型

            if test_acc > self.best_acc:
                self.best_acc = test_acc
                str_list = self.model_file.split('.')
                best_model_file = ""
                for str_index in range(len(str_list)):
                    best_model_file = best_model_file + str_list[str_index]
                    if str_index == (len(str_list) - 2):
                        best_model_file += '_best_acc'
                    if str_index != (len(str_list) - 1):
                        best_model_file += '.'
                self.save(best_model_file)  # 保存最好的模型

        self.save(self.model_file)

    def test(self):
        image = torch.FloatTensor(self.batch_size, 3, self.img_h, self.img_w)
        text = torch.IntTensor(self.batch_size * 5)
        length = torch.IntTensor(self.batch_size)
        image = Variable(image)
        text = Variable(text)
        length = Variable(length)

        for p in self.model.parameters():
            p.requires_grad = False

        test_loss = 0.0
        correct = 0
        # loss_avg = utils.averager()

        time_start = time.time()
        self.model.eval()
        for data, target in self.test_loader:
            cpu_images = data
            cpu_texts = target
            batch_size = cpu_images.size(0)
            utils.loadData(image, cpu_images)
            if self.use_unicode:
                cpu_texts = [tx.decode('utf-8') for tx in cpu_texts]

            t, l = self.converter.encode(cpu_texts)
            utils.loadData(text, t)
            utils.loadData(length, l)

            if self.use_gpu:
                image = image.cuda()

            preds = self.model(image)
            preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
            loss = self.criterion(preds, text, preds_size, length)
            test_loss += loss.item()

            _, preds = preds.max(2)
            # preds = preds.squeeze(2)
            preds = preds.transpose(1, 0).contiguous().view(-1)
            sim_preds = self.converter.decode(preds.data, preds_size.data, raw=False)
            for pred, target in zip(sim_preds, cpu_texts):
                if pred.strip() == target.strip():
                    correct += 1

        time_end = time.time()
        time_avg = float(time_end - time_start) / float(len(self.test_loader.dataset))
        accuracy = correct / float(len(self.test_loader.dataset))
        test_loss /= len(self.test_loader.dataset)
        print('[Test] loss: %f, accuray: %f, time: %f' % (test_loss, accuracy, time_avg))
        return test_loss, accuracy

    def load(self, name):
        print('[Load model] %s ...' % name)
        self.model.load_state_dict(torch.load(name))
        # self.model.load(name)

    def save(self, name):
        print('[Save model] %s ...' % name)
        torch.save(self.model.state_dict(), name)
Ejemplo n.º 12
0
from modules.CTCDecoder import Decoder
from warpctc_pytorch import CTCLoss

if __name__ == '__main__':
    print('Loading options...')
    opt = toml.loads(open('options.toml', 'r').read())

    # construct model
    exp = Exp(opt)
    model = exp.model
    decoder = Decoder(exp.trainset.vocab, lm_path=opt['general']['lm_path'])
    crit = CTCLoss()
    if opt['general']['cuda']:
        model = model.cuda()
        #model = nn.DataParallel(model).cuda()
        crit = crit.cuda()

    if opt['general']['use_keras_weights']:
        from nn_transfer import transfer
        transfer.convert_lipnet(model, 'nn_transfer/unseen-weights178.h5')
    if opt['general']['freeze_conv']:

        def freeze(m):
            m.requires_grad = False

        model.conv.apply(freeze)

    # load model
    try:
        niters = opt['general']['start_iter']
    except:
Ejemplo n.º 13
0
crnn = crnn.CRNN(opt.imgH, nc, nclass, nh, ngpu)
crnn.apply(weights_init)
if opt.crnn != '':
    print('loading pretrained model from %s' % opt.crnn)
    crnn.load_state_dict(torch.load(opt.crnn))
print(crnn)

image = torch.FloatTensor(opt.batchSize, 3, opt.imgH, opt.imgH)
text = torch.IntTensor(opt.batchSize * 5)
length = torch.IntTensor(opt.batchSize)

if opt.cuda:
    crnn.cuda()
    image = image.cuda()
    criterion = criterion.cuda()

image = Variable(image)
text = Variable(text)
length = Variable(length)

# loss averager
loss_avg = utils.averager()

# setup optimizer
if opt.adam:
    optimizer = optim.Adam(crnn.parameters(), lr=opt.lr,
                           betas=(opt.beta1, 0.999))
elif opt.adadelta:
    optimizer = optim.Adadelta(crnn.parameters(), lr=opt.lr)
else:
Ejemplo n.º 14
0
class Trainer(object):
    def __init__(self):
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpus
        if args.chars_file == '':
            self.alphabet = alphabetChinese
        else:
            self.alphabet = utils.load_chars(args.chars_file)
        nclass = len(self.alphabet) + 1
        nc = 1
        self.net = CRNN(args.imgH, nc, args.nh, nclass)
        self.train_dataloader, self.val_dataloader = self.dataloader(
            self.alphabet)
        self.criterion = CTCLoss()
        self.optimizer = self.get_optimizer()
        self.converter = utils.strLabelConverter(self.alphabet,
                                                 ignore_case=False)
        self.best_acc = 0.00001

        model_name = '%s' % (args.dataset_name)
        if not os.path.exists(args.save_prefix):
            os.mkdir(args.save_prefix)
        args.save_prefix += model_name

        if args.pretrained != '':
            print('loading pretrained model from %s' % args.pretrained)
            checkpoint = torch.load(args.pretrained)

            if 'model_state_dict' in checkpoint.keys():
                # self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
                args.start_epoch = checkpoint['epoch']
                self.best_acc = checkpoint['best_acc']
                checkpoint = checkpoint['model_state_dict']

            from collections import OrderedDict
            model_dict = OrderedDict()
            for k, v in checkpoint.items():
                if 'module' in k:
                    model_dict[k[7:]] = v
                else:
                    model_dict[k] = v
            self.net.load_state_dict(model_dict)

        if not args.cuda and torch.cuda.is_available():
            print(
                "WARNING: You have a CUDA device, so you should probably run with --cuda"
            )

        elif args.cuda and torch.cuda.is_available():
            print('available gpus is ', torch.cuda.device_count())
            self.net = torch.nn.DataParallel(self.net, output_dim=1).cuda()
            self.criterion = self.criterion.cuda()

    def dataloader(self, alphabet):
        # train_transform = transforms.Compose(
        #     [transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5),
        #     resizeNormalize(args.imgH)])
        # train_dataset = BaseDataset(args.train_dir, alphabet, transform=train_transform)
        train_dataset = NumDataset(args.train_dir,
                                   alphabet,
                                   transform=resizeNormalize(args.imgH))
        train_dataloader = DataLoader(dataset=train_dataset,
                                      batch_size=args.batch_size,
                                      shuffle=True,
                                      num_workers=args.num_workers,
                                      pin_memory=True)

        if os.path.exists(args.val_dir):
            # val_dataset = BaseDataset(args.val_dir, alphabet, transform=resizeNormalize(args.imgH))
            val_dataset = NumDataset(args.val_dir,
                                     alphabet,
                                     mode='test',
                                     transform=resizeNormalize(args.imgH))
            val_dataloader = DataLoader(dataset=val_dataset,
                                        batch_size=args.batch_size,
                                        shuffle=False,
                                        num_workers=args.num_workers,
                                        pin_memory=True)
        else:
            val_dataloader = None

        return train_dataloader, val_dataloader

    def get_optimizer(self):
        if args.optimizer == 'sgd':
            optimizer = optim.SGD(
                self.net.parameters(),
                lr=args.lr,
                momentum=args.momentum,
                weight_decay=args.wd,
            )
        elif args.optimizer == 'adam':
            optimizer = optim.Adam(
                self.net.parameters(),
                lr=args.lr,
                betas=(args.beta1, 0.999),
            )
        else:
            optimizer = optim.RMSprop(
                self.net.parameters(),
                lr=args.lr,
                momentum=args.momentum,
                weight_decay=args.wd,
            )
        return optimizer

    def train(self):
        logging.basicConfig()
        logger = logging.getLogger()
        logger.setLevel(logging.INFO)
        log_file_path = args.save_prefix + '_train.log'
        log_dir = os.path.dirname(log_file_path)
        if log_dir and not os.path.exists(log_dir):
            os.mkdir(log_dir)
        fh = logging.FileHandler(log_file_path)
        logger.addHandler(fh)
        logger.info(args)
        logger.info('Start training from [Epoch {}]'.format(args.start_epoch +
                                                            1))

        losses = utils.Averager()
        train_accuracy = utils.Averager()

        for epoch in range(args.start_epoch, args.nepoch):
            self.net.train()
            btic = time.time()
            for i, (imgs, labels) in enumerate(self.train_dataloader):
                batch_size = imgs.size()[0]
                imgs = imgs.cuda()
                preds = self.net(imgs).cpu()
                text, length = self.converter.encode(
                    labels
                )  # length  一个batch各个样本的字符长度, text 一个batch中所有中文字符所对应的下标
                preds_size = torch.IntTensor([preds.size(0)] * batch_size)
                loss_avg = self.criterion(preds, text, preds_size,
                                          length) / batch_size

                self.optimizer.zero_grad()
                loss_avg.backward()
                self.optimizer.step()

                losses.update(loss_avg.item(), batch_size)

                _, preds_m = preds.max(2)
                preds_m = preds_m.transpose(1, 0).contiguous().view(-1)
                sim_preds = self.converter.decode(preds_m.data,
                                                  preds_size.data,
                                                  raw=False)
                n_correct = 0
                for pred, target in zip(sim_preds, labels):
                    if pred == target:
                        n_correct += 1
                train_accuracy.update(n_correct, batch_size, MUL_n=False)

                if args.log_interval and not (i + 1) % args.log_interval:
                    logger.info(
                        '[Epoch {}/{}][Batch {}/{}], Speed: {:.3f} samples/sec, Loss:{:.3f}'
                        .format(epoch + 1, args.nepoch, i + 1,
                                len(self.train_dataloader),
                                batch_size / (time.time() - btic),
                                losses.val()))
                    losses.reset()

            logger.info(
                'Training accuracy: {:.3f}, [#correct:{} / #total:{}]'.format(
                    train_accuracy.val(), train_accuracy.sum,
                    train_accuracy.count))
            train_accuracy.reset()

            if args.val_interval and not (epoch + 1) % args.val_interval:
                acc = self.validate(logger)
                if acc > self.best_acc:
                    self.best_acc = acc
                    save_path = '{:s}_best.pth'.format(args.save_prefix)
                    torch.save(
                        {
                            'epoch': epoch,
                            'model_state_dict': self.net.state_dict(),
                            # 'optimizer_state_dict': self.optimizer.state_dict(),
                            'best_acc': self.best_acc,
                        },
                        save_path)
                logging.info("best acc is:{:.3f}".format(self.best_acc))
                if args.save_interval and not (epoch + 1) % args.save_interval:
                    save_path = '{:s}_{:04d}_{:.3f}.pth'.format(
                        args.save_prefix, epoch + 1, acc)
                    torch.save(
                        {
                            'epoch': epoch,
                            'model_state_dict': self.net.state_dict(),
                            # 'optimizer_state_dict': self.optimizer.state_dict(),
                            'best_acc': self.best_acc,
                        },
                        save_path)

    def validate(self, logger):
        if self.val_dataloader is None:
            return 0
        logger.info('Start validate.')
        losses = utils.Averager()
        self.net.eval()
        n_correct = 0
        with torch.no_grad():
            for i, (imgs, labels) in enumerate(self.val_dataloader):
                batch_size = imgs.size()[0]
                imgs = imgs.cuda()
                preds = self.net(imgs).cpu()
                text, length = self.converter.encode(
                    labels
                )  # length  一个batch各个样本的字符长度, text 一个batch中所有中文字符所对应的下标
                preds_size = torch.IntTensor(
                    [preds.size(0)] * batch_size)  # timestep * batchsize
                loss_avg = self.criterion(preds, text, preds_size,
                                          length) / batch_size

                losses.update(loss_avg.item(), batch_size)

                _, preds = preds.max(2)
                preds = preds.transpose(1, 0).contiguous().view(-1)
                sim_preds = self.converter.decode(preds.data,
                                                  preds_size.data,
                                                  raw=False)
                for pred, target in zip(sim_preds, labels):
                    if pred == target:
                        n_correct += 1

        accuracy = n_correct / float(losses.count)

        logger.info(
            'Evaling loss: {:.3f}, accuracy: {:.3f}, [#correct:{} / #total:{}]'
            .format(losses.val(), accuracy, n_correct, losses.count))

        return accuracy
Ejemplo n.º 15
0
        model.load_state_dict(torch.load(modelpath))
        print('Done!')
    k = 0
    losstotal = 0.0
    printinterval = opt.printinterval
    valinterval = opt.valinterval
    numinprint = 0
    # train
    for epoch in range(max_epoch):

        for i, (data, label) in enumerate(train_loader):
            k = k + 1
            numinprint = numinprint + 1
            if torch.cuda.is_available and use_gpu:
                data = data.cuda()
                loss_func = loss_func.cuda()
            model.train()
            labels = torch.IntTensor([])
            for j in range(label.size(0)):
                labels = torch.cat((labels, label[j]), 0)

            output = model(data)
            output_size = torch.IntTensor([output.size(0)] *
                                          int(output.size(1)))
            label_size = torch.IntTensor([label.size(1)] * int(label.size(0)))

            loss = loss_func(output, labels, output_size,
                             label_size) / label.size(0)
            losstotal += float(loss)
            if k % printinterval == 0:
                # display
Ejemplo n.º 16
0
class TrainModel(object):
    def __init__(self, crnn_model):
        self.crnn_model = crnn_model

        # 网络常数的设置
        self.batchSize = 2
        workers = 1
        imgH = 32
        imgW = 280
        keep_ratio = True
        self.nepochs = 10
        self.acc = 0
        lr = 0.1

        self.image = torch.FloatTensor(self.batchSize, 3, imgH, imgH)
        self.text = torch.IntTensor(self.batchSize * 5)
        self.length = torch.IntTensor(self.batchSize)
        self.converter = strLabelConverter(''.join(alphabetChinese))
        self.optimizer = optim.Adadelta(crnn_model.parameters(), lr=lr)

        roots = glob('../data/ocr/*/*.jpg')
        # 此处未考虑字符平衡划分
        trainP, testP = train_test_split(roots, test_size=0.1)
        traindataset = PathDataset(trainP, alphabetChinese)
        self.testdataset = PathDataset(testP, alphabetChinese)
        self.criterion = CTCLoss()

        self.train_loader = torch.utils.data.DataLoader(
            traindataset,
            batch_size=self.batchSize,
            shuffle=False,
            sampler=None,
            num_workers=int(workers),
            collate_fn=alignCollate(imgH=imgH,
                                    imgW=imgW,
                                    keep_ratio=keep_ratio))
        self.interval = len(self.train_loader) // 2  ##评估模型

    def trainBatch(self, net, criterion, optimizer, cpu_images, cpu_texts):
        batch_size = cpu_images.size(0)
        loadData(self.image, cpu_images)
        t, l = self.converter.encode(cpu_texts)

        loadData(self.text, t)
        loadData(self.length, l)
        preds = net(self.image)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        cost = criterion(preds, self.text, preds_size,
                         self.length) / batch_size
        net.zero_grad()
        cost.backward()
        optimizer.step()
        return cost

    def val(self, net, dataset, max_iter=100):
        for p in net.parameters():
            p.requires_grad = False
        net.eval()
        n_correct = 0
        N = len(dataset)
        max_iter = min(max_iter, N)
        for i in range(max_iter):
            im, label = dataset[np.random.randint(0, N)]
            if im.size[0] > 1024:
                continue
            pred = crnn_model.predict(im)
            if pred.strip() == label:
                n_correct += 1
            # print(pred.strip(), label)
        accuracy = n_correct / float(max_iter)
        return accuracy

    def run_train(self):
        if torch.cuda.is_available():
            crnn_model.cuda()
            # model = torch.nn.DataParallel(model, device_ids=[0])  ##转换为多GPU训练模型
            self.image = self.image.cuda()
            self.criterion = self.criterion.cuda()

        for i in range(1, self.nepochs + 1):
            print('epoch:{}/{}'.format(i, self.nepochs))
            n = len(self.train_loader)
            pbar = Progbar(target=n)
            train_iter = iter(self.train_loader)
            loss = 0

            for j in range(n):
                for name, params in crnn_model.named_parameters():
                    params.requires_grad = True
                crnn_model.train()
                cpu_images, cpu_texts = next(train_iter)
                cost = self.trainBatch(crnn_model, self.criterion,
                                       self.optimizer, cpu_images, cpu_texts)
                loss += cost.data.numpy()

                if (j + 1) % self.interval == 0:
                    # curAcc = self.val(crnn_model, self.testdataset, max_iter=1024)
                    # if curAcc > self.acc:
                    #     self.acc = curAcc
                    torch.save(crnn_model.state_dict(), 'new_modellstm.pth')
                pbar.update(j + 1,
                            values=[('loss',
                                     loss / ((j + 1) * self.batchSize)),
                                    ('acc', self.acc)])
Ejemplo n.º 17
0
class CRNN(nn.Module):
    def __init__(self, imgH, nc, nclass, nh, ngpu, n_rnn=2, leakyRelu=False):
        super(CRNN, self).__init__()
        self.ngpu = ngpu
        assert imgH % 16 == 0, 'imgH has to be a multiple of 16'

        ks = [3, 3, 3, 3, 3, 3, 2]
        ps = [1, 1, 1, 1, 1, 1, 0]
        ss = [1, 1, 1, 1, 1, 1, 1]
        nm = [64, 128, 256, 256, 512, 512, 512]

        cnn = nn.Sequential()
        self.criterion = CTCLoss()
        self.criterion = self.criterion.cuda()

        def convRelu(i, batchNormalization=False):
            nIn = nc if i == 0 else nm[i - 1]
            nOut = nm[i]
            cnn.add_module('conv{0}'.format(i),
                           nn.Conv2d(nIn, nOut, ks[i], ss[i], ps[i]))
            if batchNormalization:
                cnn.add_module('batchnorm{0}'.format(i), nn.BatchNorm2d(nOut))
            if leakyRelu:
                cnn.add_module('relu{0}'.format(i),
                               nn.LeakyReLU(0.2, inplace=True))
            else:
                cnn.add_module('relu{0}'.format(i), nn.ReLU(True))

        convRelu(0)
        cnn.add_module('pooling{0}'.format(0), nn.MaxPool2d(2, 2))  # 64x16x64
        convRelu(1)
        cnn.add_module('pooling{0}'.format(1), nn.MaxPool2d(2, 2))  # 128x8x32
        convRelu(2, True)
        convRelu(3)
        cnn.add_module('pooling{0}'.format(2),
                       nn.MaxPool2d((2, 2), (2, 1), (0, 1)))  # 256x4x16
        convRelu(4, True)
        convRelu(5)
        cnn.add_module('pooling{0}'.format(3),
                       nn.MaxPool2d((2, 2), (2, 1), (0, 1)))  # 512x2x16
        convRelu(6, True)  # 512x1x16

        self.cnn = cnn
        self.rnn = nn.Sequential(BidirectionalLSTM(512, nh, nh, ngpu),
                                 BidirectionalLSTM(nh, nh, nclass, ngpu))

    def forward(self, input, cpu_texts):
        # conv features
        image = network.np_to_variable(input)
        # conv = self.cnn(image)
        conv = utility.data_parallel(self.cnn, image, self.ngpu)

        b, c, h, w = conv.size()
        assert h == 1, "the height of conv must be 1"
        conv = conv.squeeze(2)
        conv = conv.permute(2, 0, 1)  # [w, b, c]

        # rnn features
        output = utility.data_parallel(self.rnn, conv, self.ngpu)

        # return output
        # cpu_texts = tuple(cpu_texts.reshape(1, -1)[0])

        cpu_texts = tuple(cpu_texts)
        # assert False

        #
        # utility.loadData(self.image, self.inputs)
        # print (image, "yoyo")
        alphabet = '0123456789abcdefghijklmnopqrstuvwxyz:-#\&\'!"$%&()*+-.:;<=>? ,/'
        # cpu_texts = self.frcnn.ocr

        print(cpu_texts), "cpppppppppu texxxxxxxxxxxts"
        converter = utility.strLabelConverter(alphabet)
        t, l = converter.encode(cpu_texts)

        text = torch.IntTensor(image.size(0) * 5)
        text = Variable(text)
        length = torch.IntTensor(image.size(0))
        length = Variable(length)
        utility.loadData(text, t)
        utility.loadData(length, l)
        #print(output,"tttttttttttttectxt")
        preds_size = Variable(torch.IntTensor([output.size(0)] *
                                              image.size(0)))
        # print text,length, preds_size,output

        cost = self.criterion(output, text, preds_size, length) / image.size(0)
        cost = cost.cuda()
        # cost.zero_grad()

        # self.prevcost=cost
        _, preds = output.max(2)
        preds = preds.squeeze(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        # print sim_preds.requires_grad
        for pred, target in zip(sim_preds, cpu_texts):
            print(sim_preds)
        return cost
Ejemplo n.º 18
0
crnn = crnn.CRNN(opt.imgH, num_channels, nclass, opt.hidden_size)
crnn.apply(weights_init)
if opt.pretrained != '':
    print('loading pretrained model from %s' % opt.pretrained)
    crnn.load_state_dict(torch.load(opt.pretrained))
print(crnn)

image = torch.FloatTensor(opt.batchSize, 3, opt.imgH, opt.imgH)
text = torch.IntTensor(opt.batchSize * 5)
length = torch.IntTensor(opt.batchSize)

if torch.cuda.is_available():
    crnn = crnn.cuda(opt.gpu)
    # crnn = torch.nn.DataParallel(crnn, device_ids=range(opt.ngpu))
    image = image.cuda(opt.gpu)
    criterion = criterion.cuda(opt.gpu)

image = Variable(image)
text = Variable(text)
length = Variable(length)

# loss averager
loss_avg = utils.averager()

# setup optimizer
if opt.adam:
    optimizer = optim.Adam(crnn.parameters(),
                           lr=opt.lr,
                           betas=(opt.beta1, 0.999))
elif opt.adadelta:
    optimizer = optim.Adadelta(crnn.parameters())
Ejemplo n.º 19
0
class TlstmSeqRecognizer(kraken.lib.lstm.SeqRecognizer):
    """
    Something like ClstmSeqRecognizer, using pytorch instead of clstm.
    The serialization format is the same as the clstm/master branch.
    """
    def __init__(self,
                 fname='',
                 normalize=kraken.lib.lstm.normalize_nfkc,
                 cuda=torch.cuda.is_available()):
        self.fname = fname
        self.rnn = None
        self.normalize = normalize
        self.cuda_available = cuda
        if fname:
            self._load_model()

    @classmethod
    def init_model(cls,
                   ninput,
                   nhidden,
                   noutput,
                   codec,
                   normalize=kraken.lib.lstm.normalize_nfkc,
                   cuda=torch.cuda.is_available()):
        self = cls()
        self.codec = codec
        self.normalize = normalize
        self.rnn = TBIDILSTM(ninput, nhidden, noutput)
        self.setLearningRate()
        self.trial = 0
        self.mode = 'clstm'
        self.criterion = CTCLoss()
        self.cuda_available = cuda
        if self.cuda_available:
            self.cuda()
        return self

    def cuda(self):
        if not self.cuda_available:
            return 'CUDA not available!'

        self.rnn = self.rnn.cuda()
        self.criterion = self.criterion.cuda()

    def save_model(self, path):
        network = clstm_pb2.NetworkProto(kind='Stacked',
                                         ninput=self.rnn.ninput,
                                         noutput=self.rnn.noutput)

        network.codec.extend([0] +
                             [ord(c)
                              for c in self.codec.code2char.values()][1:])

        network.attribute.extend([
            clstm_pb2.KeyValue(key='kind', value='bidi'),
            clstm_pb2.KeyValue(key='learning_rate',
                               value='{:4f}'.format(self.rnn.learning_rate)),
            clstm_pb2.KeyValue(key='momentum',
                               value='{:4f}'.format(self.rnn.momentum)),
            clstm_pb2.KeyValue(key='trial', value=repr(self.trial))
        ])

        hiddenattr = clstm_pb2.KeyValue(key='nhidden',
                                        value=repr(self.rnn.nhidden))
        networks = {}
        networks['paral'] = clstm_pb2.NetworkProto(kind='Parallel',
                                                   ninput=self.rnn.ninput,
                                                   noutput=self.rnn.nhidden *
                                                   2)

        networks['lstm1'] = clstm_pb2.NetworkProto(kind='NPLSTM',
                                                   ninput=self.rnn.ninput,
                                                   noutput=self.rnn.nhidden)
        networks['lstm1'].attribute.extend([hiddenattr])

        networks['rev'] = clstm_pb2.NetworkProto(kind='Reversed',
                                                 ninput=self.rnn.ninput,
                                                 noutput=self.rnn.nhidden)
        networks['lstm2'] = clstm_pb2.NetworkProto(kind='NPLSTM',
                                                   ninput=self.rnn.ninput,
                                                   noutput=self.rnn.nhidden)
        networks['lstm2'].attribute.extend([hiddenattr])

        networks['softm'] = clstm_pb2.NetworkProto(kind='SoftmaxLayer',
                                                   ninput=self.rnn.nhidden * 2,
                                                   noutput=self.rnn.noutput)
        networks['softm'].attribute.extend([hiddenattr])

        # weights
        weights = {}
        weights['lstm1'] = {}
        weights['lstm2'] = {}
        weights['softm'] = {}
        weights['lstm1']['WGI'], weights['lstm1']['WGF'], weights['lstm1']['WCI'], weights['lstm1']['WGO'] = \
            torch.cat([self.rnn.rnn.weight_ih_l0, self.rnn.rnn.weight_hh_l0], 1).split(self.rnn.nhidden, 0)
        weights['lstm2']['WGI'], weights['lstm2']['WGF'], weights['lstm2']['WCI'], weights['lstm2']['WGO'] = \
            torch.cat([self.rnn.rnn.weight_ih_l0_reverse, self.rnn.rnn.weight_hh_l0_reverse], 1).split(self.rnn.nhidden, 0)
        weights['softm']['W1'] = self.rnn.decoder.weight

        for n in weights.keys():
            for w in sorted(weights[n].keys()):
                warray = clstm_pb2.Array(name=w,
                                         dim=list(weights[n][w].size()))
                for v in weights[n][w].data.cpu().numpy().tolist():
                    warray.value.extend(v)
                networks[n].weights.extend([warray])

        networks['rev'].sub.extend([networks['lstm2']])
        networks['paral'].sub.extend([networks['lstm1'], networks['rev']])
        network.sub.extend([networks['paral'], networks['softm']])

        with open(path, 'wb') as fp:
            fp.write(network.SerializeToString())

    def _load_model(self):
        network = clstm_pb2.NetworkProto()
        with open(self.fname, 'rb') as f:
            network.ParseFromString(f.read())

        ninput = network.ninput
        noutput = network.noutput
        attributes = {a.key: a.value for a in network.attribute[:]}
        self.kind = attributes['kind']
        if len(attributes) > 1:
            lrate = float(attributes['learning_rate'])
            momentum = float(attributes['momentum'])
            self.trial = int(attributes['trial'])
            self.mode = "clstm"
        else:
            lrate = 1e-4
            momentum = 0.9
            self.trial = 0
            self.mode = 'clstm_compatibility'

        # Codec
        self.codec = kraken.lib.lstm.Codec()
        code2char, char2code = {}, {}
        for code, char in enumerate([126] + network.codec[1:]):
            code2char[code] = chr(char)
            char2code[chr(char)] = code
        self.codec.code2char = code2char
        self.codec.char2code = char2code

        # Networks
        networks = {}
        networks['softm'] = [
            n for n in network.sub[:] if n.kind == 'SoftmaxLayer'
        ][0]
        parallel = [n for n in network.sub[:] if n.kind == 'Parallel'][0]
        networks['lstm1'] = [
            n for n in parallel.sub[:] if n.kind.startswith('NPLSTM')
        ][0]
        rev = [n for n in parallel.sub[:] if n.kind == 'Reversed'][0]
        networks['lstm2'] = rev.sub[0]

        nhidden = int(networks['lstm1'].attribute[0].value)

        weights = {}
        for n in networks:
            weights[n] = {}
            for w in networks[n].weights[:]:
                weights[n][w.name] = np.array(w.value).reshape(w.dim[:])
        self.weights = weights

        weightnames = ('WGI', 'WGF', 'WCI', 'WGO')
        weightname_softm = 'W1'
        if self.mode == 'clstm_compatibility':
            weightnames = ('.WGI', '.WGF', '.WCI', '.WGO')
            weightname_softm = '.W'
        # lstm
        ih_hh_splits = torch.cat([torch.from_numpy(w.astype('float32')) \
                                  for w in [weights['lstm1'][wn] \
                                        for wn in weightnames]],0).split(ninput+1,1)
        weight_ih_l0 = ih_hh_splits[0]
        weight_hh_l0 = torch.cat(ih_hh_splits[1:], 1)

        # lstm_reversed
        ih_hh_splits = torch.cat([torch.from_numpy(w.astype('float32')) \
                                  for w in [weights['lstm2'][wn] \
                                        for wn in weightnames]],0).split(ninput+1,1)
        weight_ih_l0_rev = ih_hh_splits[0]
        weight_hh_l0_rev = torch.cat(ih_hh_splits[1:], 1)

        # softmax
        weight_softm = torch.from_numpy(
            weights['softm'][weightname_softm].astype('float32'))
        if self.mode == "clstm_compatibility":
            weight_softm = torch.cat(
                [torch.zeros(len(weight_softm), 1), weight_softm], 1)

        # attach weights
        self.rnn = TBIDILSTM(ninput, nhidden, noutput)
        self.rnn.rnn.weight_ih_l0 = nn.Parameter(weight_ih_l0)
        self.rnn.rnn.weight_hh_l0 = nn.Parameter(weight_hh_l0)
        self.rnn.rnn.weight_ih_l0_reverse = nn.Parameter(weight_ih_l0_rev)
        self.rnn.rnn.weight_hh_l0_reverse = nn.Parameter(weight_hh_l0_rev)
        self.rnn.decoder.weight = nn.Parameter(weight_softm)

        self.setLearningRate(lrate, momentum)
        self.rnn.zero_grad()

        self.criterion = CTCLoss()

        if self.cuda_available:
            self.cuda()

    def load_codec(self, newcodec, initrange=0.1):
        newdecoder = nn.Linear(2 * self.rnn.nhidden + 1,
                               newcodec.size(),
                               bias=False)
        if self.rnn.decoder.weight.is_cuda:
            newdecoder = newdecoder.cuda()
        newdecoder.weight.data.uniform_(-initrange, initrange)
        for c in newcodec.char2code:
            if c in self.codec.char2code:
                newdecoder.weight.data[newcodec.char2code[
                    c]] = self.rnn.decoder.weight.data[self.codec.char2code[c]]
        self.rnn.decoder = newdecoder
        self.codec = newcodec
        self.rnn.noutput = newcodec.size()

    def translate_back(self, output):
        if self.mode == 'clstm_compatibility':
            return kraken.lib.lstm.translate_back(
                output.exp().cpu().squeeze().data.numpy())

        _, preds = output.cpu().max(
            2)  # max() outputs values +1 when on gpu. why?
        dec = preds.transpose(1, 0).contiguous().view(-1).data
        char_list = []
        for i in range(len(dec)):
            if dec[i] != 0 and (not (i > 0 and dec[i - 1] == dec[i])):
                char_list.append(dec[i])
        return char_list

    def translate_back_locations(self, output):
        if self.mode == 'clstm_compatibility':
            return kraken.lib.lstm.translate_back_locations(
                output.exp().cpu().squeeze().data.numpy())

        val, preds = output.cpu().max(
            2)  # max() outputs values +1 when on gpu. why?
        dec = preds.transpose(1, 0).contiguous().view(-1).data
        char_list = []
        start = None
        for i in range(len(dec)):
            if start is None and dec[i] != 0 and (
                    not (i > 0 and dec[i - 1] == dec[i])):
                start = i
                code = dec[i]
            if start is not None and (dec[i - 1] != dec[i]):
                char_list.append(
                    (code, start, i, val[start:i + 1].max().exp().data[-1]))
                start = None
        return char_list

    def predictString(self, line):
        line = Variable(
            torch.from_numpy(
                line.reshape(-1, 1, self.rnn.ninput).astype('float32')))

        if self.cuda_available:
            line = line.cuda()

        out, _ = self.rnn.forward(line, self.rnn.init_hidden())
        self.outputs = out
        codes = [x[0] for x in self.translate_back_locations(out)]
        #codes = lstm.translate_back(out.exp().cpu().squeeze().data.numpy())
        res = ''.join(self.codec.decode(codes))
        return res

    def trainSequence(self, line, labels, update=1):
        line = Variable(
            torch.from_numpy(
                line.reshape(-1, 1, self.rnn.ninput).astype('float32')))

        if self.cuda_available:
            line = line.cuda()

        if not hasattr(self, 'hidden'):
            self.hidden = self.rnn.init_hidden()

        # repackage hidden
        self.hidden = tuple(Variable(h.data) for h in self.hidden)

        out, self.hidden = self.rnn.forward(line, self.hidden)

        tlabels = Variable(torch.IntTensor(labels))
        probs_sizes = Variable(torch.IntTensor([len(out)]))  # why Variable?
        label_sizes = Variable(torch.IntTensor([len(labels)]))
        loss = self.criterion(out, tlabels, probs_sizes, label_sizes)

        self.rnn.zero_grad()

        loss.backward()

        if update:
            self.optim.step()
            self.trial += 1
            if self.mode == 'clstm_compatibility':
                self.mode = 'clstm'

        cls = self.translate_back(out)
        return cls

    def trainString(self, line, s, update=1):
        labels = self.codec.encode(s)
        cls = self.trainSequence(line, labels)
        return ''.join(self.codec.decode(cls))

    def setLearningRate(self, rate=1e-4, momentum=0.9):
        self.rnn.learning_rate = rate
        self.rnn.momentum = momentum
        self.optim = torch.optim.RMSprop(self.rnn.parameters(),
                                         lr=self.rnn.learning_rate,
                                         momentum=self.rnn.momentum)