def __init__(self): encoder_path = './expr/attentioncnn/encoder_600.pth' decoder_path = './expr/attentioncnn/decoder_600.pth' self.alphabet = '0123456789' self.max_length = 7 # 最长字符串的长度 self.EOS_TOKEN = 1 self.use_gpu = True self.max_width = 220 self.converter = utils.strLabelConverterForAttention(self.alphabet) self.transform = transforms.ToTensor() nclass = len(self.alphabet) + 3 encoder = crnn.CNN(32, 1, 256) # 编码器 decoder = crnn.decoder(256, nclass) # seq to seq的解码器, nclass在decoder中还加了2 if encoder_path and decoder_path: print('loading pretrained models ......') encoder.load_state_dict(torch.load(encoder_path)) decoder.load_state_dict(torch.load(decoder_path)) if torch.cuda.is_available() and self.use_gpu: encoder = encoder.cuda() decoder = decoder.cuda() self.encoder = encoder.eval() self.decoder = decoder.eval()
criterion = torch.nn.NLLLoss() def weights_init(model): # Official init from torch repo. for m in model.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.constant_(m.bias, 0) encoder = crnn.CNN(opt.imgH, nc, opt.nh) decoder = crnn.decoder( opt.nh, nclass, dropout_p=0.1, max_length=opt.max_width) # max_length:w/4,为encoder特征提取之后宽度方向上的序列长度 encoder.apply(weights_init) decoder.apply(weights_init) # continue training or use the pretrained model to initial the parameters of the encoder and decoder if opt.encoder: print('loading pretrained encoder model from %s' % opt.encoder) encoder.load_state_dict(torch.load(opt.encoder)) if opt.decoder: print('loading pretrained encoder model from %s' % opt.decoder) encoder.load_state_dict(torch.load(opt.encoder)) print(encoder) print(decoder)
import utils import dataset from PIL import Image from utils import alphabet import models.crnn_lang as crnn use_gpu = True encoder_path = './expr/attentioncnn/encoder_5.pth' decoder_path = './expr/attentioncnn/decoder_5.pth' img_path = './test_img/20441531_4212871437.jpg' max_length = 15 # 最长字符串的长度 EOS_TOKEN = 1 nclass = len(alphabet) + 3 encoder = crnn.CNN(32, 1, 256) # 编码器 decoder = crnn.decoder(256, nclass) # seq to seq的解码器, nclass在decoder中还加了2 if encoder_path and decoder_path: print('loading pretrained models ......') encoder.load_state_dict(torch.load(encoder_path)) decoder.load_state_dict(torch.load(decoder_path)) if torch.cuda.is_available() and use_gpu: encoder = encoder.cuda() decoder = decoder.cuda() converter = utils.strLabelConverterForAttention(alphabet) transformer = dataset.resizeNormalize((280, 32)) image = Image.open(img_path).convert('L') image = transformer(image)
transform=dataset.resizeNormalize( (opt.imgW, opt.imgH))) alphabet = utils.getAlphabetStr(opt.alphabet) nclass = len(alphabet) + 3 # decoder的时候,需要的类别数,3 for SOS,EOS和blank print(" -- Number of classes:", nclass) nc = 1 converter = utils.strLabelConverterForAttention(alphabet) alphabet = utils.getAlphabetStr(opt.alphabet) # criterion = torch.nn.CrossEntropyLoss() criterion = torch.nn.NLLLoss() # 最后的输出要为log_softmax encoder = crnn.CNN(opt.imgH, nc, opt.nh, cnn_size=opt.cnn_size) # decoder = crnn.decoder(opt.nh, nclass, dropout_p=0.1, max_length=opt.max_width) # max_length:w/4,为encoder特征提取之后宽度方向上的序列长度 decoder = crnn.decoderV2( opt.nh, nclass, dropout_p=opt.dropout) # For prediction of an indefinite long sequence encoder.apply(utils.weights_init) decoder.apply(utils.weights_init) # continue training or use the pretrained model to initial the parameters of the encoder and decoder if opt.encoder: print('loading pretrained encoder model from %s' % opt.encoder) encoder.load_state_dict(torch.load(opt.encoder)) if opt.decoder: print('loading pretrained encoder model from %s' % opt.decoder) decoder.load_state_dict(torch.load(opt.decoder)) print(encoder) model_summary(encoder)
default='2D', help='the mode of attention') opt = parser.parse_args() #use_gpu = True # encoder_path = './expr/attentioncnn/encoder_10.pth' # decoder_path = './expr/attentioncnn/decoder_10.pth' # img_path = './test_img/20441531_4212871437.jpg' #max_length = 15 # 最长字符串的长度 #EOS_TOKEN = 1 nclass = len(alphabet) + 3 cfg.SEQUENCE.NUM_CHAR = nclass nc = 1 if opt.mode == '1D': encoder = crnn.CNN(32, 1, 256, cfg) # 编码器 decoder = crnn.decoderV2(256, nclass) # seq to seq的解码器, nclass在decoder中还加了2 #decoder = crnn.decoderV2(256, nclass - 2) else: encoder = crnn.CNN(opt.imgH, nc, opt.nh, cfg, mode=opt.mode, dim_in=opt.dim_in) decoder = crnn.SequencePredictor(cfg, nclass) if opt.encoder and opt.decoder: print('loading pretrained models ......')
if opt.mode == '1D': test_dataset = dataset.listDataset(list_file =opt.vallist, transform=dataset.resizeNormalize((opt.imgW, opt.imgH))) else: test_dataset = dataset.listDataset(list_file =opt.vallist, transform=dataset.paddingNormalize(opt.imgH, opt.imgW)) nclass = len(alphabet) + 3 # decoder的时候,需要的类别数,3 for SOS,EOS和blank print('nclass:',nclass) cfg.SEQUENCE.NUM_CHAR = nclass nc = 1 converter = utils.strLabelConverterForAttention(alphabet) # criterion = torch.nn.CrossEntropyLoss() criterion = torch.nn.NLLLoss() # 最后的输出要为log_softmax if opt.mode == '1D': encoder = crnn.CNN(opt.imgH, nc, opt.nh, cfg) decoder = crnn.decoderV2(opt.nh, nclass, dropout_p=0.1) else: encoder = crnn.CNN(opt.imgH, nc, opt.nh, cfg, mode='2D',dim_in=512) # decoder = crnn.decoder(opt.nh, nclass, dropout_p=0.1, max_length=opt.max_width) # max_length:w/4,为encoder特征提取之后宽度方向上的序列长度 #decoder = crnn.decoderV2(opt.nh, nclass, dropout_p=0.1) # For prediction of an indefinite long sequence decoder = crnn.SequencePredictor(cfg, nclass) encoder.apply(weights_init) decoder.apply(weights_init) # continue training or use the pretrained model to initial the parameters of the encoder and decoder if opt.encoder: print('loading pretrained encoder model from %s' % opt.encoder) encoder.load_state_dict(torch.load(opt.encoder)) if opt.decoder: print('loading pretrained decoder model from %s' % opt.decoder)
height = opt.imgH img_paths = [] for i in opt.image: if os.path.isdir(i): pimages = [i+os.sep+x for x in os.listdir(i) if x.split(".")[-1].lower() in ["jpg","tif","png"]] img_paths.extend(pimages) else: img_paths.append(i) EOS_TOKEN = 1 alphabet = src.utils.getAlphabetStr(opt.alphabet) nclass = len(alphabet) + 3 encoder = crnn.CNN(height, 1, opt.nh, cnn_size=opt.cnn_size)#, cnn_size=16) # decoder = crnn.decoder(256, nclass) # seq to seq的解码器, nclass在decoder中还加了2 decoder = crnn.decoderV2(opt.nh, nclass) if opt.verbose: print(encoder) model_summary(encoder.cnn) print(decoder) model_summary(decoder) if encoder_path and decoder_path: if opt.verbose: print('loading pretrained models ......') print(" - encoder_path:",encoder_path) print(" - decoder_path:",decoder_path) try: encoder.load_state_dict(torch.load(encoder_path,map_location='cpu' if not use_gpu else None)) except RuntimeError as e: