def main(opt): dataset = VideoDataset(opt, "test") opt["vocab_size"] = dataset.get_vocab_size() opt["seq_length"] = dataset.max_len if opt["model"] == 'S2VTModel': model = S2VTModel(opt["vocab_size"], opt["max_len"], opt["dim_hidden"], opt["dim_word"], rnn_dropout_p=opt["rnn_dropout_p"]).cuda() elif opt["model"] == "S2VTAttModel": encoder = EncoderRNN(opt["dim_vid"], opt["dim_hidden"], bidirectional=opt["bidirectional"], input_dropout_p=opt["input_dropout_p"], rnn_dropout_p=opt["rnn_dropout_p"]) decoder = DecoderRNN(opt["vocab_size"], opt["max_len"], opt["dim_hidden"], opt["dim_word"], input_dropout_p=opt["input_dropout_p"], rnn_dropout_p=opt["rnn_dropout_p"], bidirectional=opt["bidirectional"]) model = S2VTAttModel(encoder, decoder).cuda() elif opt["model"] == "CTCmodel": model = CTCmodel(opt['vocab_size'] + 1, opt['dim_hidden']) # model = nn.DataParallel(model) # Setup the model model.load_state_dict(torch.load(opt["saved_model"])) model.cuda() crit = CTCLoss() crit = crit.cuda() # crit = utils.LanguageModelCriterion() test(model, crit, dataset, dataset.get_vocab(), opt)
def train(root, start_epoch, epoch_num, letters, net=None, lr=0.1, fix_width=True): trainloader = load_data(root, training=True, fix_width=fix_width) use_cuda = torch.cuda.is_available() if not net: net = CRNN(1, len(letters) + 1) criterion = CTCLoss() optimizer = optim.Adadelta(net.parameters(), lr=lr) if use_cuda: net = net.cuda() criterion = criterion.cuda() labeltransformer = LabelTransformer(letters) print('==== Training.. ====') net.train() for epoch in range(start_epoch, start_epoch + epoch_num): print('---- epoch: %d ----' % (epoch, )) loss_sum = 0 for i, (img, label) in enumerate(trainloader): label, label_length = labeltransformer.encode(label) if use_cuda: img = img.cuda() img, label = Variable(img), Variable(label) label_length = Variable(label_length) optimizer.zero_grad() outputs = net(img) output_length = Variable( torch.IntTensor([outputs.size(0)] * outputs.size(1))) loss = criterion(outputs, label, output_length, label_length) loss.backward() optimizer.step() loss_sum += loss.data[0] print('loss = %f' % (loss_sum, )) print('Finished Training') return net
crnn.load_state_dict(torch.load(opt.crnn)) print(crnn) image = torch.FloatTensor(opt.batchSize, 3, opt.imgH, opt.imgH) textAttention = torch.LongTensor(opt.batchSize * 5) lengthAttention = torch.IntTensor(opt.batchSize) textCTC = torch.IntTensor(opt.batchSize * 5) lengthCTC = torch.IntTensor(opt.batchSize) if opt.cuda: crnn.cuda() crnn = torch.nn.DataParallel(crnn, device_ids=range(opt.ngpu)) image = image.cuda() textAttention = textAttention.cuda() criterionAttention = criterionAttention.cuda() criterionCTC = criterionCTC.cuda() image = Variable(image) textAttention = Variable(textAttention) lengthAttention = Variable(lengthAttention) textCTC = Variable(textCTC) lengthCTC = Variable(lengthCTC) # loss averager loss_avg = utils.averager() loss_CTC = utils.averager() loss_Attention = utils.averager() # setup optimizer if opt.adam:
def main(config_yaml): ''' Training/Finetune CNN_RNN_Attention Model. ''' #### Load config settings. #### f = open(config_yaml, encoding='utf-8') opt = yaml.load(f) if os.path.isdir(opt['LOGGER_PATH']) == False: os.mkdir(opt['LOGGER_PATH']) logger = Logger(opt['LOGGER_PATH']) if os.path.isdir(opt['SAVE_PATH']) == False: os.system('mkdir -p {0}'.format(opt['SAVE_PATH'])) manualSeed = random.randint(1, 10000) random.seed(manualSeed) np.random.seed(manualSeed) torch.manual_seed(manualSeed) cudnn.benchmark = True #### Set up DataLoader. #### train_cfg = opt['TRAIN'] ds_cfg = train_cfg['DATA_SOURCE'] print('Building up dataset:{}'.format(ds_cfg['TYPE'])) if ds_cfg['TYPE'] == 'SYN_DATA': text_gen = util.TextGenerator(ds_cfg['GEN_SET'], ds_cfg['GEN_LEN']) ds_train = dataset.synthDataset(ds_cfg['FONT_ROOT'], ds_cfg['FONT_SIZE'], text_gen) elif ds_cfg['TYPE'] == 'IMG_DATA': ds_train = dataset.trainDataset( ds_cfg['IMG_ROOT'], ds_cfg['TRAIN_SET'], transform=None) #dataset.graybackNormalize() assert ds_train train_loader = torch.utils.data.DataLoader( ds_train, batch_size=train_cfg['BATCH_SIZE'], shuffle=True, sampler=None, num_workers=opt['WORKERS'], collate_fn=dataset.alignCollate(imgH=train_cfg['IMG_H'], imgW=train_cfg['MAX_W'])) val_cfg = opt['VALIDATION'] ds_val = dataset.testDataset(val_cfg['IMG_ROOT'], val_cfg['VAL_SET'], transform=None) #dataset.graybackNormalize() assert ds_val val_loader = torch.utils.data.DataLoader(ds_val, batch_size=32, shuffle=False, num_workers=opt['WORKERS'], collate_fn=dataset.alignCollate( imgH=train_cfg['IMG_H'], imgW=train_cfg['MAX_W'])) #### Model construction and Initialization. #### alphabet = keys.alphabet nClass = len(alphabet) + 1 if opt['N_GPU'] > 1: opt['RNN']['multi_gpu'] = True else: opt['RNN']['multi_gpu'] = False model = crann.CRANN(opt, nClass) #print(model) #### Train/Val the model. #### converter = util.strLabelConverter(alphabet) criterion = CTCLoss() if opt['CUDA']: model.cuda() criterion.cuda() if opt['OPTIMIZER'] == 'RMSprop': optimizer = optim.RMSprop(model.parameters(), lr=opt['TRAIN']['LR']) elif opt['OPTIMIZER'] == 'Adam': optimizer = optim.Adam(model.parameters(), lr=opt['TRAIN']['LR'], betas=(opt['TRAIN']['BETA1'], 0.999)) elif opt['OPTIMIZER'] == 'SGD': optimizer = optim.SGD(model.parameters(), lr=opt['TRAIN']['LR']) else: optimizer = optim.Adadelta(model.parameters(), lr=opt['TRAIN']['LR']) start_epoch = 0 if opt['VAL_ONLY']: print('=>loading pretrained model from %s for val only.' % opt['CRANN']) checkpoint = torch.load(opt['CRANN']) model.load_state_dict(checkpoint['state_dict']) val(model, val_loader, criterion, converter, 0, 0, logger, True) elif opt['FINETUNE']: print('=>loading pretrained model from %s for finetuen.' % opt['CRANN']) checkpoint = torch.load(opt['CRANN']) #model.load_state_dict(checkpoint['state_dict']) model_dict = model.state_dict() #print(model_dict.keys()) cnn_dict = { "cnn." + k: v for k, v in checkpoint.items() if "cnn." + k in model_dict } model_dict.update(cnn_dict) model.load_state_dict(model_dict) for epoch in range(start_epoch, opt['EPOCHS']): adjust_lr(optimizer, opt['TRAIN']['LR'], epoch, opt['STEP']) train(model, train_loader, val_loader, criterion, optimizer, opt, converter, epoch, logger) elif opt['RESUME']: print('=>loading checkpoint from %s for resume training.' % opt['CRANN']) checkpoint = torch.load(opt['CRANN']) start_epoch = checkpoint['epoch'] + 1 print('resume from epoch:{}'.format(start_epoch)) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) for epoch in range(start_epoch, opt['EPOCHS']): adjust_lr(optimizer, opt['TRAIN']['LR'], epoch, opt['STEP']) train(model, train_loader, val_loader, criterion, optimizer, opt, converter, epoch, logger) else: print('train from scratch.') for epoch in range(start_epoch, opt['EPOCHS']): adjust_lr(optimizer, opt['TRAIN']['LR'], epoch, opt['STEP']) train(model, train_loader, val_loader, criterion, optimizer, opt, converter, epoch, logger)
# for name, module in crnn.named_children(): # if name == 'cnn': # module_dict = module.state_dict() # new_state_dict = {k: v for k, v in new_state_dict.items() if k in module_dict} # module_dict.update(new_state_dict) # module.load_state_dict(module_dict) image = torch.FloatTensor(opt.batchSize, 3, opt.imgH, opt.imgH) text = torch.IntTensor(opt.batchSize * 5) length = torch.IntTensor(opt.batchSize) if opt.cuda: crnn = crnn.cuda(id_gpu) image = image.cuda(id_gpu) criterion = criterion.cuda(id_gpu) image = Variable(image) text = Variable(text) length = Variable(length) # loss averager loss_avg = util.averager() # setup optimizer if opt.adam: optimizer = optim.Adam(crnn.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) elif opt.adadelta: optimizer = optim.Adadelta(crnn.parameters(), lr=opt.lr)
m.bias.data.fill_(0) crnn = CRNN(nchannels, nclass, opt.nhidden) crnn.apply(weights_init) image = torch.FloatTensor(opt.batch_size, 1, 1, 1) text = torch.IntTensor(opt.batch_size * 5) length = torch.IntTensor(opt.batch_size) if opt.cuda: torch.cuda.set_device(opt.gpu_choice) crnn = crnn.cuda(opt.gpu_choice) image = image.cuda(opt.gpu_choice) criterion = criterion.cuda(opt.gpu_choice) if opt.crnn != '': print('loading pretrained model from %s' % opt.crnn) crnn.load_state_dict(torch.load(opt.crnn)) image = Variable(image) text = Variable(text) length = Variable(length) # loss averager loss_avg = utils.averager() # setup optimizer if opt.rms: optimizer = optim.RMSprop(crnn.parameters(), lr=opt.lr)
def main(opt): print(opt) if opt.experiment is None: opt.experiment = 'expr' os.system('mkdir {0}'.format(opt.experiment)) # Why is this? opt.manualSeed = random.randint(1, 10000) # fix seed print("Random Seed: ", opt.manualSeed) random.seed(opt.manualSeed) np.random.seed(opt.manualSeed) torch.manual_seed(opt.manualSeed) cudnn.benchmark = True if torch.cuda.is_available() and not opt.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) train_dataset = dataset.hwrDataset(mode="train") assert train_dataset # if not opt.random_sample: # sampler = dataset.randomSequentialSampler(train_dataset, opt.batchSize) # else: # sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=opt.batchSize, shuffle=True, num_workers=int(opt.workers), collate_fn=dataset.alignCollate( imgH=opt.imgH, imgW=opt.imgW, keep_ratio=True)) # test_dataset = dataset.lmdbDataset( # root=opt.valroot, transform=dataset.resizeNormalize((100, 32))) test_dataset = dataset.hwrDataset(mode="test", transform=dataset.resizeNormalize( (100, 32))) nclass = len(opt.alphabet) + 1 nc = 1 criterion = CTCLoss() # custom weights initialization called on crnn def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: m.weight.data.normal_(0.0, 0.02) elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0) crnn = crnn_model.CRNN(opt.imgH, nc, nclass, opt.nh) crnn.apply(weights_init) if opt.crnn != '': print('loading pretrained model from %s' % opt.crnn) crnn.load_state_dict(torch.load(opt.crnn)) print(crnn) # TODO make this central image = torch.FloatTensor(opt.batchSize, 3, opt.imgH, opt.imgH) text = torch.IntTensor(opt.batchSize * 5) length = torch.IntTensor(opt.batchSize) if opt.cuda: crnn.cuda() crnn = torch.nn.DataParallel(crnn, device_ids=range(opt.ngpu)) image = image.cuda() criterion = criterion.cuda() image = Variable(image) text = Variable(text) length = Variable(length) # TODO what is this, read this. # loss averager loss_avg = utils.averager() # Todo default is RMS Prop. I wonder why? # setup optimizer #Following the paper's recommendation opt.adadelta = True if opt.adam: optimizer = optim.Adam(crnn.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) elif opt.adadelta: optimizer = optim.Adadelta(crnn.parameters(), lr=opt.lr) else: optimizer = optim.RMSprop(crnn.parameters(), lr=opt.lr) converter = utils.strLabelConverter(opt.alphabet) def val(net, dataset, criterion, max_iter=100): print('Start val') for p in crnn.parameters(): p.requires_grad = False net.eval() data_loader = torch.utils.data.DataLoader(dataset, shuffle=True, batch_size=opt.batchSize, num_workers=int(opt.workers)) val_iter = iter(data_loader) n_correct = 0 loss_avg = utils.averager() max_iter = min(max_iter, len(data_loader)) for i in range(max_iter): print("Is 'i' jumping two values? i == " + str(i)) data = val_iter.next() i += 1 cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size loss_avg.add(cost) _, preds = preds.max( 2 ) # todo where is the output size set to 26? Empirically it is. # preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) # Todo read this. for pred, target in zip(sim_preds, cpu_texts): if pred == target.lower(): n_correct += 1 raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:opt.n_test_disp] for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts): print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt)) accuracy = n_correct / float(max_iter * opt.batchSize) print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy)) for epoch in range(opt.niter): train_iter = iter(train_loader) i = 0 while i < len(train_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() cost = train_batch(crnn, criterion, optimizer, train_iter, opt, converter) loss_avg.add(cost) i += 1 if i % opt.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (epoch, opt.niter, i, len(train_loader), loss_avg.val())) loss_avg.reset() if i % opt.valInterval == 0: try: val(crnn, test_dataset, criterion) except Exception as e: print(e) # do checkpointing if i % opt.saveInterval == 0: torch.save( crnn.state_dict(), '{0}/netCRNN_{1}_{2}.pth'.format(opt.experiment, epoch, i))
class LanguageModelTrainer: def __init__(self, model, loader, val_loader, test_loader, max_epochs=1, run_id='exp'): """ Use this class to train your model """ # feel free to add any other parameters here self.model = model.cuda() if torch.cuda.is_available() else model self.loader = loader self.val_loader = val_loader self.test_loader = test_loader self.train_losses = [] self.val_losses = [] self.predictions = [] self.predictions_test = [] self.generated_logits = [] self.generated = [] self.generated_logits_test = [] self.generated_test = [] self.epochs = 0 self.max_epochs = max_epochs self.run_id = run_id self.optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-6) # self.optimizer = torch.optim.SGD(model.parameters(), lr=0.0001, weight_decay=1e-6, momentum=0.9) self.criterion = CTCLoss()#size_average=True, length_average=False) self.criterion = self.criterion.cuda() if torch.cuda.is_available() else self.criterion self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, factor=0.1, patience=2) self.LD = Levenshtein(phoneme_list.PHONEME_MAP) self.best_rate = 1e10 self.decoder = CTCBeamDecoder(labels=[' '] + phoneme_list.PHONEME_MAP, blank_id=0, beam_width=150) def train(self): self.model.train() # set to training mode for epoch in range(self.max_epochs): epoch_loss = 0 training_epoch_loss = 0 for batch_num, (inputs, targets) in enumerate(self.loader): # # debug # # Save init values # old_state_dict = {} # for key in model.state_dict(): # old_state_dict[key] = model.state_dict()[key].clone() # # # Your training procedure # loss = self.train_batch(inputs, targets) # # # Save new params # new_state_dict = {} # for key in model.state_dict(): # new_state_dict[key] = model.state_dict()[key].clone() # # # Compare params # for key in old_state_dict: # if (old_state_dict[key] == new_state_dict[key]).all(): # print('No diff in {}'.format(key)) # print('Batch loss is ', float(loss)) loss = self.train_batch(inputs, targets) epoch_loss += loss training_epoch_loss += loss # training print batch_print = 40 if batch_num % batch_print == 0 and batch_num != 0: self.print_training(batch_num, self.loader.batch_size, training_epoch_loss, batch_print) training_epoch_loss = 0 epoch_loss = epoch_loss / (batch_num + 1) self.epochs += 1 self.scheduler.step(epoch_loss) print('[TRAIN] Epoch [%d/%d] Loss: %.4f' % (self.epochs, self.max_epochs, epoch_loss)) self.train_losses.append(epoch_loss) # log loss tLog.log_scalar('training_loss', epoch_loss, self.epochs) # log values and gradients of parameters (histogram summary) for tag, value in model.named_parameters(): tag = tag.replace('.', '/') tLog.log_histogram(tag, value.data.cpu().numpy(), self.epochs) tLog.log_histogram(tag+'/grad', value.grad.data.cpu().numpy(), self.epochs) # every 1 epochs, print validation statistics epochs_print = 1 if self.epochs % epochs_print == 0 and not self.epochs == 0: with torch.no_grad(): t = "######### Epoch {} #########".format(self.epochs) print(t) logging.info(t) ls = 0 lens = 0 for j, (val_inputs, val_labels) in (enumerate(self.val_loader)): idx = np.random.randint(0, len(val_inputs)) print('Pred', self.gen_batch(val_inputs[idx:idx + 1])) print('Ground', ''.join([phoneme_list.PHONEME_MAP[o - 1] for o in val_labels[idx]])) val_output, _, feature_lengths = self.model(val_inputs) ls += self.LD.forward(val_output, val_labels, feature_lengths) lens += len(val_inputs) ls /= lens t = "Validation LD {}:".format(ls) print(t) logging.info(t) t = '--------------------------------------------' print(t) logging.info(t) # log loss vLog.log_scalar('LD', ls, self.epochs) if self.best_rate > ls: torch.save(model.state_dict(), "models/checkpoint.pt") self.best_rate = ls def print_training(self, batch_num, batch_size, loss, batch_print): t = 'At {:.0f}% of epoch {}'.format( batch_num * batch_size / self.loader.dataset.num_entries * 100, self.epochs) print(t) logging.info(t) t = "Training loss : {}".format(loss / batch_print) print(t) logging.info(t) t = '--------------------------------------------' print(t) logging.info(t) def train_batch(self, inputs, targets): lens_tar = torch.Tensor([len(target) for target in targets]) # lens of all targets (sorted by loader) targets = torch.cat(targets) targets = targets.cuda() if torch.cuda.is_available() else targets outputs, _, lens_in = self.model(inputs) # T x B x num_phonema, ignore hidden lens_in = torch.Tensor(lens_in) loss = self.criterion(outputs, targets.int().cpu(), lens_in.int().cpu(), lens_tar.int().cpu()) loss.backward() self.optimizer.step() self.optimizer.zero_grad() # print([i for i in model.cnn.modules()][1].__dict__['_parameters']['weight'][0]) return float(loss) # avoid autograd retention def test(self): preds = [] for i, inputs in enumerate(self.test_loader): pred = self.gen_batch(inputs) preds += pred return preds def gen_batch(self, data_batch): scores, _, out_lengths = model(data_batch) out_lengths = torch.Tensor(out_lengths) scores = torch.transpose(scores, 0, 1) probs = F.softmax(scores, dim=2).data.cpu() output, scores, timesteps, out_seq_len = self.decoder.decode(probs=probs, seq_lens=out_lengths) out_seq = [] for i in range(output.size(0)): chrs = [phoneme_list.PHONEME_MAP[o.item() - 1] for o in output[i, 0, :out_seq_len[i, 0]]] out_seq.append("".join(chrs)) return out_seq
else: for k, v in model_dict.items(): if (k != weig1 or k != bias1): model_dict[k] = pre_trainmodel[k] crnn.load_state_dict(model_dict) print(crnn) image = torch.FloatTensor(opt.batchSize, 3, opt.imgH, opt.imgH) text = torch.IntTensor(opt.batchSize * 5) length = torch.IntTensor(opt.batchSize) if opt.cuda: crnn.cuda() crnn = torch.nn.DataParallel(crnn, device_ids=range(opt.ngpu)) image = image.cuda() criterion = criterion.cuda() image = Variable(image) text = Variable(text) length = Variable(length) # loss averager loss_avg = utils.averager() # setup optimizer if opt.adam: optimizer = optim.Adam(crnn.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) elif opt.adadelta: optimizer = optim.Adadelta(crnn.parameters(), lr=opt.lr)
def main(opt, case): print("Arguments are : " + str(opt)) if opt.experiment is None: opt.experiment = 'expr' os.system('mkdir {0}'.format(opt.experiment)) # Why do we use this? opt.manualSeed = random.randint(1, 10000) # fix seed print("Random Seed: ", opt.manualSeed) random.seed(opt.manualSeed) np.random.seed(opt.manualSeed) torch.manual_seed(opt.manualSeed) cudnn.benchmark = True if torch.cuda.is_available() and not opt.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) opt.cuda = True print('Set CUDA to true.') train_dataset = dataset.hwrDataset(mode="train") assert train_dataset # The shuffle needs to be false when the sizing has been done. train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=opt.batchSize, shuffle=False, num_workers=int(opt.workers), collate_fn=dataset.alignCollate( imgH=opt.imgH, imgW=opt.imgW, keep_ratio=True)) test_dataset = dataset.hwrDataset(mode="test", transform=dataset.resizeNormalize( (100, 32))) nclass = len(opt.alphabet) + 1 nc = 1 criterion = CTCLoss() # custom weights initialization called on crnn def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: m.weight.data.normal_(0.0, 0.02) elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0) crnn = crnn_model.CRNN(opt.imgH, nc, nclass, opt.nh) crnn.apply(weights_init) if opt.cuda and not opt.uses_old_saving: crnn.cuda() crnn = torch.nn.DataParallel(crnn, device_ids=range(opt.ngpu)) criterion = criterion.cuda() if opt.crnn != '': print('Loading pre-trained model from %s' % opt.crnn) loaded_model = torch.load(opt.crnn) if opt.uses_old_saving: print("Assuming model was saved in rudementary fashion") crnn.load_state_dict(loaded_model) crnn.cuda() crnn = torch.nn.DataParallel(crnn, device_ids=range(opt.ngpu)) criterion = criterion.cuda() start_epoch = 0 else: print("Loaded model accuracy: " + str(loaded_model['accuracy'])) print("Loaded model epoch: " + str(loaded_model['epoch'])) start_epoch = loaded_model['epoch'] crnn.load_state_dict(loaded_model['state']) # Read this. loss_avg = utils.averager() # If following the paper's recommendation, using AdaDelta if opt.adam: optimizer = optim.Adam(crnn.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) elif opt.adadelta: optimizer = optim.Adadelta(crnn.parameters(), lr=opt.lr) elif opt.adagrad: print("Using adagrad") optimizer = optim.Adagrad(crnn.parameters(), lr=opt.lr) else: optimizer = optim.RMSprop(crnn.parameters(), lr=opt.lr) converter = utils.strLabelConverter(opt.alphabet) best_val_accuracy = 0 for epoch in range(start_epoch, opt.niter): train_iter = iter(train_loader) i = 0 while i < len(train_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() cost = train_batch(crnn, criterion, optimizer, train_iter, opt, converter) loss_avg.add(cost) i += 1 if i % opt.displayInterval == 0: print( '[%d/%d][%d/%d] Loss: %f' % (epoch, opt.niter, i, len(train_loader), loss_avg.val()) + " " + case) loss_avg.reset() if i % opt.valInterval == 0: try: val_loss_avg, accuracy = val_batch(crnn, opt, test_dataset, converter, criterion) model_state = { 'epoch': epoch + 1, 'iter': i, 'state': crnn.state_dict(), 'accuracy': accuracy, 'val_loss_avg': val_loss_avg, } utils.save_checkpoint( model_state, accuracy > best_val_accuracy, '{0}/netCRNN_{1}_{2}_{3}.pth'.format( opt.experiment, epoch, i, accuracy), opt.experiment) if accuracy > best_val_accuracy: best_val_accuracy = accuracy except Exception as e: print(e)
class ModuleTrain: def __init__(self, train_path, test_path, model_file, model, img_h=32, img_w=110, batch_size=64, lr=1e-3, use_unicode=True, best_loss=0.2, use_gpu=True, workers=1): self.model = model self.model_file = model_file self.use_unicode = use_unicode self.img_h = img_h self.img_w = img_w self.batch_size = batch_size self.lr = lr self.best_loss = best_loss self.best_acc = 0.95 self.use_gpu = use_gpu self.workers = workers self.converter = utils.strLabelConverter(alphabet) self.criterion = CTCLoss() if self.use_gpu: print("[use gpu] ...") self.model = self.model.cuda() self.criterion = self.criterion.cuda() if torch.cuda.is_available() and not self.use_gpu: print("[WARNING] You have a CUDA device, so you should probably run with --cuda") # 加载模型 if os.path.exists(self.model_file): self.load(self.model_file) else: print('[Load model] error !!!') self.transform = T.Compose([ T.Resize((self.img_h, self.img_w)), T.ToTensor(), # T.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5]) ]) train_label = os.path.join(train_path, 'labels_normal.txt') train_dataset = my_dataset.MyDataset(root=train_path, label_file=train_label, transform=self.transform, is_train=True, img_h=self.img_h, img_w=self.img_w) self.train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=int(self.workers)) test_label = os.path.join(test_path, 'labels_normal.txt') test_dataset = my_dataset.MyDataset(root=test_path, label_file=test_label, transform=self.transform, is_train=False, img_h=self.img_h, img_w=self.img_w) self.test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=self.batch_size, shuffle=False, num_workers=int(self.workers)) # setup optimizer # if opt.adam: # self.optimizer = optim.Adam(crnn.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) # elif opt.adadelta: # self.optimizer = optim.Adadelta(crnn.parameters(), lr=opt.lr) # else: # self.optimizer = optim.RMSprop(crnn.parameters(), lr=opt.lr) self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr, weight_decay=1e-5) def train(self, epoch, decay_epoch=80): image = torch.FloatTensor(self.batch_size, 3, self.img_h, self.img_w) text = torch.IntTensor(self.batch_size * 5) length = torch.IntTensor(self.batch_size) image = Variable(image) text = Variable(text) length = Variable(length) print('[train] epoch: %d' % epoch) for epoch_i in range(epoch): train_loss = 0.0 correct = 0 if epoch_i >= decay_epoch and epoch_i % decay_epoch == 0: # 减小学习速率 self.lr = self.lr * 0.1 for param_group in self.optimizer.param_groups: param_group['lr'] = self.lr # self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr, weight_decay=1e-5) print('================================================') self.model.train() for batch_idx, (data, target) in enumerate(self.train_loader): # 训练 # data, target = Variable(data), Variable(target) if self.use_unicode: target = [tx.decode('utf-8') for tx in target] batch_size = data.size(0) utils.loadData(image, data) t, l = self.converter.encode(target) utils.loadData(text, t) utils.loadData(length, l) if self.use_gpu: image = image.cuda() # 梯度清0 self.optimizer.zero_grad() for p in self.model.parameters(): p.requires_grad = True # 计算损失 preds = self.model(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) # print('preds_size', preds_size) loss = self.criterion(preds, text, preds_size, length) # self.model.zero_grad() # 反向传播计算梯度 loss.backward() # 更新参数 self.optimizer.step() train_loss += loss.item() # print(preds.size()) # total = 0.0 # print('len', len(preds.data[0][0])) # for i in range(len(preds.data[0][0])): # total += preds.data[0][0][i] # print('total', total) _, preds = preds.max(2) # print(preds.size()) # preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) # print(preds.size()) sim_preds = self.converter.decode(preds.data, preds_size.data, raw=False) # print(sim_preds) # print(target) # total_preds = self.converter.decode(preds.data, preds_size.data, raw=True) # print(total_preds) for pred, target in zip(sim_preds, target): if pred.strip() == target.strip(): correct += 1 train_loss /= len(self.train_loader.dataset) acc = float(correct) / float(len(self.train_loader.dataset)) print('[Train] Epoch: {} \tLoss: {:.6f}\tAcc: {:.6f}\tlr: {}'.format(epoch_i, train_loss, acc, self.lr)) # Test test_loss, test_acc = self.test() if test_loss < self.best_loss: self.best_loss = test_loss str_list = self.model_file.split('.') best_model_file = "" for str_index in range(len(str_list)): best_model_file = best_model_file + str_list[str_index] if str_index == (len(str_list) - 2): best_model_file += '_best' if str_index != (len(str_list) - 1): best_model_file += '.' self.save(best_model_file) # 保存最好的模型 if test_acc > self.best_acc: self.best_acc = test_acc str_list = self.model_file.split('.') best_model_file = "" for str_index in range(len(str_list)): best_model_file = best_model_file + str_list[str_index] if str_index == (len(str_list) - 2): best_model_file += '_best_acc' if str_index != (len(str_list) - 1): best_model_file += '.' self.save(best_model_file) # 保存最好的模型 self.save(self.model_file) def test(self): image = torch.FloatTensor(self.batch_size, 3, self.img_h, self.img_w) text = torch.IntTensor(self.batch_size * 5) length = torch.IntTensor(self.batch_size) image = Variable(image) text = Variable(text) length = Variable(length) for p in self.model.parameters(): p.requires_grad = False test_loss = 0.0 correct = 0 # loss_avg = utils.averager() time_start = time.time() self.model.eval() for data, target in self.test_loader: cpu_images = data cpu_texts = target batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) if self.use_unicode: cpu_texts = [tx.decode('utf-8') for tx in cpu_texts] t, l = self.converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) if self.use_gpu: image = image.cuda() preds = self.model(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) loss = self.criterion(preds, text, preds_size, length) test_loss += loss.item() _, preds = preds.max(2) # preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = self.converter.decode(preds.data, preds_size.data, raw=False) for pred, target in zip(sim_preds, cpu_texts): if pred.strip() == target.strip(): correct += 1 time_end = time.time() time_avg = float(time_end - time_start) / float(len(self.test_loader.dataset)) accuracy = correct / float(len(self.test_loader.dataset)) test_loss /= len(self.test_loader.dataset) print('[Test] loss: %f, accuray: %f, time: %f' % (test_loss, accuracy, time_avg)) return test_loss, accuracy def load(self, name): print('[Load model] %s ...' % name) self.model.load_state_dict(torch.load(name)) # self.model.load(name) def save(self, name): print('[Save model] %s ...' % name) torch.save(self.model.state_dict(), name)
from modules.CTCDecoder import Decoder from warpctc_pytorch import CTCLoss if __name__ == '__main__': print('Loading options...') opt = toml.loads(open('options.toml', 'r').read()) # construct model exp = Exp(opt) model = exp.model decoder = Decoder(exp.trainset.vocab, lm_path=opt['general']['lm_path']) crit = CTCLoss() if opt['general']['cuda']: model = model.cuda() #model = nn.DataParallel(model).cuda() crit = crit.cuda() if opt['general']['use_keras_weights']: from nn_transfer import transfer transfer.convert_lipnet(model, 'nn_transfer/unseen-weights178.h5') if opt['general']['freeze_conv']: def freeze(m): m.requires_grad = False model.conv.apply(freeze) # load model try: niters = opt['general']['start_iter'] except:
crnn = crnn.CRNN(opt.imgH, nc, nclass, nh, ngpu) crnn.apply(weights_init) if opt.crnn != '': print('loading pretrained model from %s' % opt.crnn) crnn.load_state_dict(torch.load(opt.crnn)) print(crnn) image = torch.FloatTensor(opt.batchSize, 3, opt.imgH, opt.imgH) text = torch.IntTensor(opt.batchSize * 5) length = torch.IntTensor(opt.batchSize) if opt.cuda: crnn.cuda() image = image.cuda() criterion = criterion.cuda() image = Variable(image) text = Variable(text) length = Variable(length) # loss averager loss_avg = utils.averager() # setup optimizer if opt.adam: optimizer = optim.Adam(crnn.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) elif opt.adadelta: optimizer = optim.Adadelta(crnn.parameters(), lr=opt.lr) else:
class Trainer(object): def __init__(self): os.environ['CUDA_VISIBLE_DEVICES'] = args.gpus if args.chars_file == '': self.alphabet = alphabetChinese else: self.alphabet = utils.load_chars(args.chars_file) nclass = len(self.alphabet) + 1 nc = 1 self.net = CRNN(args.imgH, nc, args.nh, nclass) self.train_dataloader, self.val_dataloader = self.dataloader( self.alphabet) self.criterion = CTCLoss() self.optimizer = self.get_optimizer() self.converter = utils.strLabelConverter(self.alphabet, ignore_case=False) self.best_acc = 0.00001 model_name = '%s' % (args.dataset_name) if not os.path.exists(args.save_prefix): os.mkdir(args.save_prefix) args.save_prefix += model_name if args.pretrained != '': print('loading pretrained model from %s' % args.pretrained) checkpoint = torch.load(args.pretrained) if 'model_state_dict' in checkpoint.keys(): # self.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) args.start_epoch = checkpoint['epoch'] self.best_acc = checkpoint['best_acc'] checkpoint = checkpoint['model_state_dict'] from collections import OrderedDict model_dict = OrderedDict() for k, v in checkpoint.items(): if 'module' in k: model_dict[k[7:]] = v else: model_dict[k] = v self.net.load_state_dict(model_dict) if not args.cuda and torch.cuda.is_available(): print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) elif args.cuda and torch.cuda.is_available(): print('available gpus is ', torch.cuda.device_count()) self.net = torch.nn.DataParallel(self.net, output_dim=1).cuda() self.criterion = self.criterion.cuda() def dataloader(self, alphabet): # train_transform = transforms.Compose( # [transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5), # resizeNormalize(args.imgH)]) # train_dataset = BaseDataset(args.train_dir, alphabet, transform=train_transform) train_dataset = NumDataset(args.train_dir, alphabet, transform=resizeNormalize(args.imgH)) train_dataloader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) if os.path.exists(args.val_dir): # val_dataset = BaseDataset(args.val_dir, alphabet, transform=resizeNormalize(args.imgH)) val_dataset = NumDataset(args.val_dir, alphabet, mode='test', transform=resizeNormalize(args.imgH)) val_dataloader = DataLoader(dataset=val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True) else: val_dataloader = None return train_dataloader, val_dataloader def get_optimizer(self): if args.optimizer == 'sgd': optimizer = optim.SGD( self.net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.wd, ) elif args.optimizer == 'adam': optimizer = optim.Adam( self.net.parameters(), lr=args.lr, betas=(args.beta1, 0.999), ) else: optimizer = optim.RMSprop( self.net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.wd, ) return optimizer def train(self): logging.basicConfig() logger = logging.getLogger() logger.setLevel(logging.INFO) log_file_path = args.save_prefix + '_train.log' log_dir = os.path.dirname(log_file_path) if log_dir and not os.path.exists(log_dir): os.mkdir(log_dir) fh = logging.FileHandler(log_file_path) logger.addHandler(fh) logger.info(args) logger.info('Start training from [Epoch {}]'.format(args.start_epoch + 1)) losses = utils.Averager() train_accuracy = utils.Averager() for epoch in range(args.start_epoch, args.nepoch): self.net.train() btic = time.time() for i, (imgs, labels) in enumerate(self.train_dataloader): batch_size = imgs.size()[0] imgs = imgs.cuda() preds = self.net(imgs).cpu() text, length = self.converter.encode( labels ) # length 一个batch各个样本的字符长度, text 一个batch中所有中文字符所对应的下标 preds_size = torch.IntTensor([preds.size(0)] * batch_size) loss_avg = self.criterion(preds, text, preds_size, length) / batch_size self.optimizer.zero_grad() loss_avg.backward() self.optimizer.step() losses.update(loss_avg.item(), batch_size) _, preds_m = preds.max(2) preds_m = preds_m.transpose(1, 0).contiguous().view(-1) sim_preds = self.converter.decode(preds_m.data, preds_size.data, raw=False) n_correct = 0 for pred, target in zip(sim_preds, labels): if pred == target: n_correct += 1 train_accuracy.update(n_correct, batch_size, MUL_n=False) if args.log_interval and not (i + 1) % args.log_interval: logger.info( '[Epoch {}/{}][Batch {}/{}], Speed: {:.3f} samples/sec, Loss:{:.3f}' .format(epoch + 1, args.nepoch, i + 1, len(self.train_dataloader), batch_size / (time.time() - btic), losses.val())) losses.reset() logger.info( 'Training accuracy: {:.3f}, [#correct:{} / #total:{}]'.format( train_accuracy.val(), train_accuracy.sum, train_accuracy.count)) train_accuracy.reset() if args.val_interval and not (epoch + 1) % args.val_interval: acc = self.validate(logger) if acc > self.best_acc: self.best_acc = acc save_path = '{:s}_best.pth'.format(args.save_prefix) torch.save( { 'epoch': epoch, 'model_state_dict': self.net.state_dict(), # 'optimizer_state_dict': self.optimizer.state_dict(), 'best_acc': self.best_acc, }, save_path) logging.info("best acc is:{:.3f}".format(self.best_acc)) if args.save_interval and not (epoch + 1) % args.save_interval: save_path = '{:s}_{:04d}_{:.3f}.pth'.format( args.save_prefix, epoch + 1, acc) torch.save( { 'epoch': epoch, 'model_state_dict': self.net.state_dict(), # 'optimizer_state_dict': self.optimizer.state_dict(), 'best_acc': self.best_acc, }, save_path) def validate(self, logger): if self.val_dataloader is None: return 0 logger.info('Start validate.') losses = utils.Averager() self.net.eval() n_correct = 0 with torch.no_grad(): for i, (imgs, labels) in enumerate(self.val_dataloader): batch_size = imgs.size()[0] imgs = imgs.cuda() preds = self.net(imgs).cpu() text, length = self.converter.encode( labels ) # length 一个batch各个样本的字符长度, text 一个batch中所有中文字符所对应的下标 preds_size = torch.IntTensor( [preds.size(0)] * batch_size) # timestep * batchsize loss_avg = self.criterion(preds, text, preds_size, length) / batch_size losses.update(loss_avg.item(), batch_size) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = self.converter.decode(preds.data, preds_size.data, raw=False) for pred, target in zip(sim_preds, labels): if pred == target: n_correct += 1 accuracy = n_correct / float(losses.count) logger.info( 'Evaling loss: {:.3f}, accuracy: {:.3f}, [#correct:{} / #total:{}]' .format(losses.val(), accuracy, n_correct, losses.count)) return accuracy
model.load_state_dict(torch.load(modelpath)) print('Done!') k = 0 losstotal = 0.0 printinterval = opt.printinterval valinterval = opt.valinterval numinprint = 0 # train for epoch in range(max_epoch): for i, (data, label) in enumerate(train_loader): k = k + 1 numinprint = numinprint + 1 if torch.cuda.is_available and use_gpu: data = data.cuda() loss_func = loss_func.cuda() model.train() labels = torch.IntTensor([]) for j in range(label.size(0)): labels = torch.cat((labels, label[j]), 0) output = model(data) output_size = torch.IntTensor([output.size(0)] * int(output.size(1))) label_size = torch.IntTensor([label.size(1)] * int(label.size(0))) loss = loss_func(output, labels, output_size, label_size) / label.size(0) losstotal += float(loss) if k % printinterval == 0: # display
class TrainModel(object): def __init__(self, crnn_model): self.crnn_model = crnn_model # 网络常数的设置 self.batchSize = 2 workers = 1 imgH = 32 imgW = 280 keep_ratio = True self.nepochs = 10 self.acc = 0 lr = 0.1 self.image = torch.FloatTensor(self.batchSize, 3, imgH, imgH) self.text = torch.IntTensor(self.batchSize * 5) self.length = torch.IntTensor(self.batchSize) self.converter = strLabelConverter(''.join(alphabetChinese)) self.optimizer = optim.Adadelta(crnn_model.parameters(), lr=lr) roots = glob('../data/ocr/*/*.jpg') # 此处未考虑字符平衡划分 trainP, testP = train_test_split(roots, test_size=0.1) traindataset = PathDataset(trainP, alphabetChinese) self.testdataset = PathDataset(testP, alphabetChinese) self.criterion = CTCLoss() self.train_loader = torch.utils.data.DataLoader( traindataset, batch_size=self.batchSize, shuffle=False, sampler=None, num_workers=int(workers), collate_fn=alignCollate(imgH=imgH, imgW=imgW, keep_ratio=keep_ratio)) self.interval = len(self.train_loader) // 2 ##评估模型 def trainBatch(self, net, criterion, optimizer, cpu_images, cpu_texts): batch_size = cpu_images.size(0) loadData(self.image, cpu_images) t, l = self.converter.encode(cpu_texts) loadData(self.text, t) loadData(self.length, l) preds = net(self.image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, self.text, preds_size, self.length) / batch_size net.zero_grad() cost.backward() optimizer.step() return cost def val(self, net, dataset, max_iter=100): for p in net.parameters(): p.requires_grad = False net.eval() n_correct = 0 N = len(dataset) max_iter = min(max_iter, N) for i in range(max_iter): im, label = dataset[np.random.randint(0, N)] if im.size[0] > 1024: continue pred = crnn_model.predict(im) if pred.strip() == label: n_correct += 1 # print(pred.strip(), label) accuracy = n_correct / float(max_iter) return accuracy def run_train(self): if torch.cuda.is_available(): crnn_model.cuda() # model = torch.nn.DataParallel(model, device_ids=[0]) ##转换为多GPU训练模型 self.image = self.image.cuda() self.criterion = self.criterion.cuda() for i in range(1, self.nepochs + 1): print('epoch:{}/{}'.format(i, self.nepochs)) n = len(self.train_loader) pbar = Progbar(target=n) train_iter = iter(self.train_loader) loss = 0 for j in range(n): for name, params in crnn_model.named_parameters(): params.requires_grad = True crnn_model.train() cpu_images, cpu_texts = next(train_iter) cost = self.trainBatch(crnn_model, self.criterion, self.optimizer, cpu_images, cpu_texts) loss += cost.data.numpy() if (j + 1) % self.interval == 0: # curAcc = self.val(crnn_model, self.testdataset, max_iter=1024) # if curAcc > self.acc: # self.acc = curAcc torch.save(crnn_model.state_dict(), 'new_modellstm.pth') pbar.update(j + 1, values=[('loss', loss / ((j + 1) * self.batchSize)), ('acc', self.acc)])
class CRNN(nn.Module): def __init__(self, imgH, nc, nclass, nh, ngpu, n_rnn=2, leakyRelu=False): super(CRNN, self).__init__() self.ngpu = ngpu assert imgH % 16 == 0, 'imgH has to be a multiple of 16' ks = [3, 3, 3, 3, 3, 3, 2] ps = [1, 1, 1, 1, 1, 1, 0] ss = [1, 1, 1, 1, 1, 1, 1] nm = [64, 128, 256, 256, 512, 512, 512] cnn = nn.Sequential() self.criterion = CTCLoss() self.criterion = self.criterion.cuda() def convRelu(i, batchNormalization=False): nIn = nc if i == 0 else nm[i - 1] nOut = nm[i] cnn.add_module('conv{0}'.format(i), nn.Conv2d(nIn, nOut, ks[i], ss[i], ps[i])) if batchNormalization: cnn.add_module('batchnorm{0}'.format(i), nn.BatchNorm2d(nOut)) if leakyRelu: cnn.add_module('relu{0}'.format(i), nn.LeakyReLU(0.2, inplace=True)) else: cnn.add_module('relu{0}'.format(i), nn.ReLU(True)) convRelu(0) cnn.add_module('pooling{0}'.format(0), nn.MaxPool2d(2, 2)) # 64x16x64 convRelu(1) cnn.add_module('pooling{0}'.format(1), nn.MaxPool2d(2, 2)) # 128x8x32 convRelu(2, True) convRelu(3) cnn.add_module('pooling{0}'.format(2), nn.MaxPool2d((2, 2), (2, 1), (0, 1))) # 256x4x16 convRelu(4, True) convRelu(5) cnn.add_module('pooling{0}'.format(3), nn.MaxPool2d((2, 2), (2, 1), (0, 1))) # 512x2x16 convRelu(6, True) # 512x1x16 self.cnn = cnn self.rnn = nn.Sequential(BidirectionalLSTM(512, nh, nh, ngpu), BidirectionalLSTM(nh, nh, nclass, ngpu)) def forward(self, input, cpu_texts): # conv features image = network.np_to_variable(input) # conv = self.cnn(image) conv = utility.data_parallel(self.cnn, image, self.ngpu) b, c, h, w = conv.size() assert h == 1, "the height of conv must be 1" conv = conv.squeeze(2) conv = conv.permute(2, 0, 1) # [w, b, c] # rnn features output = utility.data_parallel(self.rnn, conv, self.ngpu) # return output # cpu_texts = tuple(cpu_texts.reshape(1, -1)[0]) cpu_texts = tuple(cpu_texts) # assert False # # utility.loadData(self.image, self.inputs) # print (image, "yoyo") alphabet = '0123456789abcdefghijklmnopqrstuvwxyz:-#\&\'!"$%&()*+-.:;<=>? ,/' # cpu_texts = self.frcnn.ocr print(cpu_texts), "cpppppppppu texxxxxxxxxxxts" converter = utility.strLabelConverter(alphabet) t, l = converter.encode(cpu_texts) text = torch.IntTensor(image.size(0) * 5) text = Variable(text) length = torch.IntTensor(image.size(0)) length = Variable(length) utility.loadData(text, t) utility.loadData(length, l) #print(output,"tttttttttttttectxt") preds_size = Variable(torch.IntTensor([output.size(0)] * image.size(0))) # print text,length, preds_size,output cost = self.criterion(output, text, preds_size, length) / image.size(0) cost = cost.cuda() # cost.zero_grad() # self.prevcost=cost _, preds = output.max(2) preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) # print sim_preds.requires_grad for pred, target in zip(sim_preds, cpu_texts): print(sim_preds) return cost
crnn = crnn.CRNN(opt.imgH, num_channels, nclass, opt.hidden_size) crnn.apply(weights_init) if opt.pretrained != '': print('loading pretrained model from %s' % opt.pretrained) crnn.load_state_dict(torch.load(opt.pretrained)) print(crnn) image = torch.FloatTensor(opt.batchSize, 3, opt.imgH, opt.imgH) text = torch.IntTensor(opt.batchSize * 5) length = torch.IntTensor(opt.batchSize) if torch.cuda.is_available(): crnn = crnn.cuda(opt.gpu) # crnn = torch.nn.DataParallel(crnn, device_ids=range(opt.ngpu)) image = image.cuda(opt.gpu) criterion = criterion.cuda(opt.gpu) image = Variable(image) text = Variable(text) length = Variable(length) # loss averager loss_avg = utils.averager() # setup optimizer if opt.adam: optimizer = optim.Adam(crnn.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) elif opt.adadelta: optimizer = optim.Adadelta(crnn.parameters())
class TlstmSeqRecognizer(kraken.lib.lstm.SeqRecognizer): """ Something like ClstmSeqRecognizer, using pytorch instead of clstm. The serialization format is the same as the clstm/master branch. """ def __init__(self, fname='', normalize=kraken.lib.lstm.normalize_nfkc, cuda=torch.cuda.is_available()): self.fname = fname self.rnn = None self.normalize = normalize self.cuda_available = cuda if fname: self._load_model() @classmethod def init_model(cls, ninput, nhidden, noutput, codec, normalize=kraken.lib.lstm.normalize_nfkc, cuda=torch.cuda.is_available()): self = cls() self.codec = codec self.normalize = normalize self.rnn = TBIDILSTM(ninput, nhidden, noutput) self.setLearningRate() self.trial = 0 self.mode = 'clstm' self.criterion = CTCLoss() self.cuda_available = cuda if self.cuda_available: self.cuda() return self def cuda(self): if not self.cuda_available: return 'CUDA not available!' self.rnn = self.rnn.cuda() self.criterion = self.criterion.cuda() def save_model(self, path): network = clstm_pb2.NetworkProto(kind='Stacked', ninput=self.rnn.ninput, noutput=self.rnn.noutput) network.codec.extend([0] + [ord(c) for c in self.codec.code2char.values()][1:]) network.attribute.extend([ clstm_pb2.KeyValue(key='kind', value='bidi'), clstm_pb2.KeyValue(key='learning_rate', value='{:4f}'.format(self.rnn.learning_rate)), clstm_pb2.KeyValue(key='momentum', value='{:4f}'.format(self.rnn.momentum)), clstm_pb2.KeyValue(key='trial', value=repr(self.trial)) ]) hiddenattr = clstm_pb2.KeyValue(key='nhidden', value=repr(self.rnn.nhidden)) networks = {} networks['paral'] = clstm_pb2.NetworkProto(kind='Parallel', ninput=self.rnn.ninput, noutput=self.rnn.nhidden * 2) networks['lstm1'] = clstm_pb2.NetworkProto(kind='NPLSTM', ninput=self.rnn.ninput, noutput=self.rnn.nhidden) networks['lstm1'].attribute.extend([hiddenattr]) networks['rev'] = clstm_pb2.NetworkProto(kind='Reversed', ninput=self.rnn.ninput, noutput=self.rnn.nhidden) networks['lstm2'] = clstm_pb2.NetworkProto(kind='NPLSTM', ninput=self.rnn.ninput, noutput=self.rnn.nhidden) networks['lstm2'].attribute.extend([hiddenattr]) networks['softm'] = clstm_pb2.NetworkProto(kind='SoftmaxLayer', ninput=self.rnn.nhidden * 2, noutput=self.rnn.noutput) networks['softm'].attribute.extend([hiddenattr]) # weights weights = {} weights['lstm1'] = {} weights['lstm2'] = {} weights['softm'] = {} weights['lstm1']['WGI'], weights['lstm1']['WGF'], weights['lstm1']['WCI'], weights['lstm1']['WGO'] = \ torch.cat([self.rnn.rnn.weight_ih_l0, self.rnn.rnn.weight_hh_l0], 1).split(self.rnn.nhidden, 0) weights['lstm2']['WGI'], weights['lstm2']['WGF'], weights['lstm2']['WCI'], weights['lstm2']['WGO'] = \ torch.cat([self.rnn.rnn.weight_ih_l0_reverse, self.rnn.rnn.weight_hh_l0_reverse], 1).split(self.rnn.nhidden, 0) weights['softm']['W1'] = self.rnn.decoder.weight for n in weights.keys(): for w in sorted(weights[n].keys()): warray = clstm_pb2.Array(name=w, dim=list(weights[n][w].size())) for v in weights[n][w].data.cpu().numpy().tolist(): warray.value.extend(v) networks[n].weights.extend([warray]) networks['rev'].sub.extend([networks['lstm2']]) networks['paral'].sub.extend([networks['lstm1'], networks['rev']]) network.sub.extend([networks['paral'], networks['softm']]) with open(path, 'wb') as fp: fp.write(network.SerializeToString()) def _load_model(self): network = clstm_pb2.NetworkProto() with open(self.fname, 'rb') as f: network.ParseFromString(f.read()) ninput = network.ninput noutput = network.noutput attributes = {a.key: a.value for a in network.attribute[:]} self.kind = attributes['kind'] if len(attributes) > 1: lrate = float(attributes['learning_rate']) momentum = float(attributes['momentum']) self.trial = int(attributes['trial']) self.mode = "clstm" else: lrate = 1e-4 momentum = 0.9 self.trial = 0 self.mode = 'clstm_compatibility' # Codec self.codec = kraken.lib.lstm.Codec() code2char, char2code = {}, {} for code, char in enumerate([126] + network.codec[1:]): code2char[code] = chr(char) char2code[chr(char)] = code self.codec.code2char = code2char self.codec.char2code = char2code # Networks networks = {} networks['softm'] = [ n for n in network.sub[:] if n.kind == 'SoftmaxLayer' ][0] parallel = [n for n in network.sub[:] if n.kind == 'Parallel'][0] networks['lstm1'] = [ n for n in parallel.sub[:] if n.kind.startswith('NPLSTM') ][0] rev = [n for n in parallel.sub[:] if n.kind == 'Reversed'][0] networks['lstm2'] = rev.sub[0] nhidden = int(networks['lstm1'].attribute[0].value) weights = {} for n in networks: weights[n] = {} for w in networks[n].weights[:]: weights[n][w.name] = np.array(w.value).reshape(w.dim[:]) self.weights = weights weightnames = ('WGI', 'WGF', 'WCI', 'WGO') weightname_softm = 'W1' if self.mode == 'clstm_compatibility': weightnames = ('.WGI', '.WGF', '.WCI', '.WGO') weightname_softm = '.W' # lstm ih_hh_splits = torch.cat([torch.from_numpy(w.astype('float32')) \ for w in [weights['lstm1'][wn] \ for wn in weightnames]],0).split(ninput+1,1) weight_ih_l0 = ih_hh_splits[0] weight_hh_l0 = torch.cat(ih_hh_splits[1:], 1) # lstm_reversed ih_hh_splits = torch.cat([torch.from_numpy(w.astype('float32')) \ for w in [weights['lstm2'][wn] \ for wn in weightnames]],0).split(ninput+1,1) weight_ih_l0_rev = ih_hh_splits[0] weight_hh_l0_rev = torch.cat(ih_hh_splits[1:], 1) # softmax weight_softm = torch.from_numpy( weights['softm'][weightname_softm].astype('float32')) if self.mode == "clstm_compatibility": weight_softm = torch.cat( [torch.zeros(len(weight_softm), 1), weight_softm], 1) # attach weights self.rnn = TBIDILSTM(ninput, nhidden, noutput) self.rnn.rnn.weight_ih_l0 = nn.Parameter(weight_ih_l0) self.rnn.rnn.weight_hh_l0 = nn.Parameter(weight_hh_l0) self.rnn.rnn.weight_ih_l0_reverse = nn.Parameter(weight_ih_l0_rev) self.rnn.rnn.weight_hh_l0_reverse = nn.Parameter(weight_hh_l0_rev) self.rnn.decoder.weight = nn.Parameter(weight_softm) self.setLearningRate(lrate, momentum) self.rnn.zero_grad() self.criterion = CTCLoss() if self.cuda_available: self.cuda() def load_codec(self, newcodec, initrange=0.1): newdecoder = nn.Linear(2 * self.rnn.nhidden + 1, newcodec.size(), bias=False) if self.rnn.decoder.weight.is_cuda: newdecoder = newdecoder.cuda() newdecoder.weight.data.uniform_(-initrange, initrange) for c in newcodec.char2code: if c in self.codec.char2code: newdecoder.weight.data[newcodec.char2code[ c]] = self.rnn.decoder.weight.data[self.codec.char2code[c]] self.rnn.decoder = newdecoder self.codec = newcodec self.rnn.noutput = newcodec.size() def translate_back(self, output): if self.mode == 'clstm_compatibility': return kraken.lib.lstm.translate_back( output.exp().cpu().squeeze().data.numpy()) _, preds = output.cpu().max( 2) # max() outputs values +1 when on gpu. why? dec = preds.transpose(1, 0).contiguous().view(-1).data char_list = [] for i in range(len(dec)): if dec[i] != 0 and (not (i > 0 and dec[i - 1] == dec[i])): char_list.append(dec[i]) return char_list def translate_back_locations(self, output): if self.mode == 'clstm_compatibility': return kraken.lib.lstm.translate_back_locations( output.exp().cpu().squeeze().data.numpy()) val, preds = output.cpu().max( 2) # max() outputs values +1 when on gpu. why? dec = preds.transpose(1, 0).contiguous().view(-1).data char_list = [] start = None for i in range(len(dec)): if start is None and dec[i] != 0 and ( not (i > 0 and dec[i - 1] == dec[i])): start = i code = dec[i] if start is not None and (dec[i - 1] != dec[i]): char_list.append( (code, start, i, val[start:i + 1].max().exp().data[-1])) start = None return char_list def predictString(self, line): line = Variable( torch.from_numpy( line.reshape(-1, 1, self.rnn.ninput).astype('float32'))) if self.cuda_available: line = line.cuda() out, _ = self.rnn.forward(line, self.rnn.init_hidden()) self.outputs = out codes = [x[0] for x in self.translate_back_locations(out)] #codes = lstm.translate_back(out.exp().cpu().squeeze().data.numpy()) res = ''.join(self.codec.decode(codes)) return res def trainSequence(self, line, labels, update=1): line = Variable( torch.from_numpy( line.reshape(-1, 1, self.rnn.ninput).astype('float32'))) if self.cuda_available: line = line.cuda() if not hasattr(self, 'hidden'): self.hidden = self.rnn.init_hidden() # repackage hidden self.hidden = tuple(Variable(h.data) for h in self.hidden) out, self.hidden = self.rnn.forward(line, self.hidden) tlabels = Variable(torch.IntTensor(labels)) probs_sizes = Variable(torch.IntTensor([len(out)])) # why Variable? label_sizes = Variable(torch.IntTensor([len(labels)])) loss = self.criterion(out, tlabels, probs_sizes, label_sizes) self.rnn.zero_grad() loss.backward() if update: self.optim.step() self.trial += 1 if self.mode == 'clstm_compatibility': self.mode = 'clstm' cls = self.translate_back(out) return cls def trainString(self, line, s, update=1): labels = self.codec.encode(s) cls = self.trainSequence(line, labels) return ''.join(self.codec.decode(cls)) def setLearningRate(self, rate=1e-4, momentum=0.9): self.rnn.learning_rate = rate self.rnn.momentum = momentum self.optim = torch.optim.RMSprop(self.rnn.parameters(), lr=self.rnn.learning_rate, momentum=self.rnn.momentum)