def __init__(self): if chinsesModel: alphabet = keys.alphabetChinese else: alphabet = keys.alphabetEnglish self.converter = util.strLabelConverter(alphabet) if torch.cuda.is_available() and GPU: # LSTMFLAG=True crnn 否则 dense ocr self.model = CRNN(32, 1, len(alphabet) + 1, 256, 1, lstmFlag=LSTMFLAG).cuda() else: self.model = CRNN(32, 1, len(alphabet) + 1, 256, 1, lstmFlag=LSTMFLAG).cpu() state_dict = torch.load(ocrModel, map_location=lambda storage, loc: storage) new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k.replace('module.', '') # remove `module.` new_state_dict[name] = v # load params self.model.load_state_dict(new_state_dict)
class crnn(object): def __init__(self): if chinsesModel: alphabet = keys.alphabetChinese else: alphabet = keys.alphabetEnglish self.converter = util.strLabelConverter(alphabet) if torch.cuda.is_available() and GPU: # LSTMFLAG=True crnn 否则 dense ocr self.model = CRNN(32, 1, len(alphabet) + 1, 256, 1, lstmFlag=LSTMFLAG).cuda() else: self.model = CRNN(32, 1, len(alphabet) + 1, 256, 1, lstmFlag=LSTMFLAG).cpu() state_dict = torch.load(ocrModel, map_location=lambda storage, loc: storage) new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k.replace('module.', '') # remove `module.` new_state_dict[name] = v # load params self.model.load_state_dict(new_state_dict) def crnnOcr(self, image): scale = image.size[1] * 1.0 / 32 w = image.size[0] / scale w = int(w) # print "im size:{},{}".format(image.size,w) transformer = dataset.resizeNormalize((w, 32)) if torch.cuda.is_available() and GPU: image = transformer(image).cuda() else: image = transformer(image).cpu() image = image.view(1, *image.size()) image = Variable(image) self.model.eval() preds = self.model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) sim_pred = self.converter.decode(preds.data, preds_size.data, raw=False) return sim_pred
def crnn_recognition(part_image, app): model = CRNN(32, 1, nclass, 256) # if torch.cuda.is_available(): # model = model.cuda() app.logger.info('loading pretrained model from {0}'.format( params.crnn_model_path)) trainWeights = torch.load(params.crnn_model_path, map_location=lambda storage, loc: storage) modelWeights = OrderedDict() for k, v in trainWeights.items(): name = k.replace('module.', '') # remove `module.` modelWeights[name] = v model.load_state_dict(modelWeights) converter = crnn.utils.strLabelConverter(alphabet) image = part_image.convert('L') w = int(image.size[0] / (280 * 1.0 / 160)) transformer = crnn.dataset.resizeNormalize((w, 32)) image = transformer(image) # if torch.cuda.is_available(): # image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) return sim_pred
train_dataset = TextDataset( opt.train_filename, opt.root_dir, opt.max_label_length, transforms.Compose([Rescale((32, 100)), Gray(), ZeroMean(), ToTensor()]), ) train_loader = DataLoader(train_dataset, 64, True) device = opt.device net = CRNN() net.apply(weights_init) net = net.to(device) net.zero_grad() params = net.parameters() ctc_loss = CTCLoss() optimizer = optim.Adam(params, weight_decay=1e-5) best_loss = 50 print("gc is enabled", gc.isenabled()) for epoch in trange(opt.epoch): running_loss = 0.0 for i, train_data in tqdm(enumerate(train_loader, 0)):
## 加载预训练模型权重 def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: m.weight.data.normal_(0.0, 0.02) elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0) from crnn.models.crnn import CRNN from config import ocrModel, LSTMFLAG, GPU model = CRNN(32, 1, len(alphabetChinese) + 1, 256, 1, lstmFlag=LSTMFLAG) model.apply(weights_init) preWeightDict = torch.load(ocrModel, map_location=lambda storage, loc: storage) ##加入项目训练的权重 modelWeightDict = model.state_dict() for k, v in preWeightDict.items(): name = k.replace('module.', '') # remove `module.` if 'rnn.1.embedding' not in name: ##不加载最后一层权重 modelWeightDict[name] = v model.load_state_dict(modelWeightDict) ##优化器
else: if score > maxScore: maxScore = score topPred = text return topPred if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--images", help="Absolute path to location of tire image for text extraction(folder/file)", required=True) parser.add_argument("--saveFolder", help="Location where prediction should be stored", required=False, default=None) args, _ = parser.parse_known_args() modelPath = os.path.join(CRNN_FOLDER, "crnn.pth") model = CRNN(imgH=32, nc=1, nclass=37, nh=256, ngpu=1).cuda() model.load_state_dict(torch.load(modelPath)) imageList = [] if os.path.isfile(args.images): image = str(Path(args.images).resolve()) imageList = [image] elif os.path.isdir(args.images): imageList.extend(glob.glob(os.path.join(args.images, "*.jpg"))) else: raise AssertionError("Value of --images must be a valid file or folder") if args.saveFolder: saveFolder = args.saveFolder else: saveFolder = os.path.join(os.getcwd(), "result") if not os.path.isdir(saveFolder): os.mkdir(saveFolder)
# logging.debug('') # logging.debug('compare: true: ', label, 'pred:', str(pred)) if pred is not None: log_msg = label + ' |pred: ' + str(pred) logging.debug(log_msg) print('compare: true: ', label, 'pred:', pred) if pred.strip() == label: n_correct += 1 # print('n_correct: ', n_correct) accuracy = n_correct / float(max_iter) print('val_accruracy: ', accuracy) return accuracy print('lstm: ', LSTMFLAG) model = CRNN(32, 1, len(alphabetChinese) + 1, 256, 1, lstmFlag=LSTMFLAG) # just run this line when training from strach # model.apply(weights_init) print('load weights: ', ocrModel) preWeightDict = torch.load( ocrModel, map_location=lambda storage, loc: storage) ##加入项目训练的权重 modelWeightDict = model.state_dict() for k, v in preWeightDict.items(): name = k.replace('module.', '') # remove `module.` if 'rnn.1.embedding' not in name: ##不加载最后一层权重 modelWeightDict[name] = v model.load_state_dict(modelWeightDict) print('model has been loaded')