alphabet = utils.getAlphabetStr(opt.alphabet) nclass = len(alphabet) + 3 # decoder的时候,需要的类别数,3 for SOS,EOS和blank print(" -- Number of classes:", nclass) nc = 1 converter = utils.strLabelConverterForAttention(alphabet) alphabet = utils.getAlphabetStr(opt.alphabet) # criterion = torch.nn.CrossEntropyLoss() criterion = torch.nn.NLLLoss() # 最后的输出要为log_softmax encoder = crnn.CNN(opt.imgH, nc, opt.nh, cnn_size=opt.cnn_size) # decoder = crnn.decoder(opt.nh, nclass, dropout_p=0.1, max_length=opt.max_width) # max_length:w/4,为encoder特征提取之后宽度方向上的序列长度 decoder = crnn.decoderV2( opt.nh, nclass, dropout_p=opt.dropout) # For prediction of an indefinite long sequence encoder.apply(utils.weights_init) decoder.apply(utils.weights_init) # continue training or use the pretrained model to initial the parameters of the encoder and decoder if opt.encoder: print('loading pretrained encoder model from %s' % opt.encoder) encoder.load_state_dict(torch.load(opt.encoder)) if opt.decoder: print('loading pretrained encoder model from %s' % opt.decoder) decoder.load_state_dict(torch.load(opt.decoder)) print(encoder) model_summary(encoder) print(decoder) model_summary(decoder)
test_dataset = dataset.listDataset(list_file =opt.vallist, transform=dataset.resizeNormalize((opt.imgW, opt.imgH))) else: test_dataset = dataset.listDataset(list_file =opt.vallist, transform=dataset.paddingNormalize(opt.imgH, opt.imgW)) nclass = len(alphabet) + 3 # decoder的时候,需要的类别数,3 for SOS,EOS和blank print('nclass:',nclass) cfg.SEQUENCE.NUM_CHAR = nclass nc = 1 converter = utils.strLabelConverterForAttention(alphabet) # criterion = torch.nn.CrossEntropyLoss() criterion = torch.nn.NLLLoss() # 最后的输出要为log_softmax if opt.mode == '1D': encoder = crnn.CNN(opt.imgH, nc, opt.nh, cfg) decoder = crnn.decoderV2(opt.nh, nclass, dropout_p=0.1) else: encoder = crnn.CNN(opt.imgH, nc, opt.nh, cfg, mode='2D',dim_in=512) # decoder = crnn.decoder(opt.nh, nclass, dropout_p=0.1, max_length=opt.max_width) # max_length:w/4,为encoder特征提取之后宽度方向上的序列长度 #decoder = crnn.decoderV2(opt.nh, nclass, dropout_p=0.1) # For prediction of an indefinite long sequence decoder = crnn.SequencePredictor(cfg, nclass) encoder.apply(weights_init) decoder.apply(weights_init) # continue training or use the pretrained model to initial the parameters of the encoder and decoder if opt.encoder: print('loading pretrained encoder model from %s' % opt.encoder) encoder.load_state_dict(torch.load(opt.encoder)) if opt.decoder: print('loading pretrained decoder model from %s' % opt.decoder) decoder.load_state_dict(torch.load(opt.decoder))
help='the mode of attention') opt = parser.parse_args() #use_gpu = True # encoder_path = './expr/attentioncnn/encoder_10.pth' # decoder_path = './expr/attentioncnn/decoder_10.pth' # img_path = './test_img/20441531_4212871437.jpg' #max_length = 15 # 最长字符串的长度 #EOS_TOKEN = 1 nclass = len(alphabet) + 3 cfg.SEQUENCE.NUM_CHAR = nclass nc = 1 if opt.mode == '1D': encoder = crnn.CNN(32, 1, 256, cfg) # 编码器 decoder = crnn.decoderV2(256, nclass) # seq to seq的解码器, nclass在decoder中还加了2 #decoder = crnn.decoderV2(256, nclass - 2) else: encoder = crnn.CNN(opt.imgH, nc, opt.nh, cfg, mode=opt.mode, dim_in=opt.dim_in) decoder = crnn.SequencePredictor(cfg, nclass) if opt.encoder and opt.decoder: print('loading pretrained models ......') encoder_dict = encoder.state_dict() trained_encoder_dict = torch.load(opt.encoder)
img_paths = [] for i in opt.image: if os.path.isdir(i): pimages = [i+os.sep+x for x in os.listdir(i) if x.split(".")[-1].lower() in ["jpg","tif","png"]] img_paths.extend(pimages) else: img_paths.append(i) EOS_TOKEN = 1 alphabet = src.utils.getAlphabetStr(opt.alphabet) nclass = len(alphabet) + 3 encoder = crnn.CNN(height, 1, opt.nh, cnn_size=opt.cnn_size)#, cnn_size=16) # decoder = crnn.decoder(256, nclass) # seq to seq的解码器, nclass在decoder中还加了2 decoder = crnn.decoderV2(opt.nh, nclass) if opt.verbose: print(encoder) model_summary(encoder.cnn) print(decoder) model_summary(decoder) if encoder_path and decoder_path: if opt.verbose: print('loading pretrained models ......') print(" - encoder_path:",encoder_path) print(" - decoder_path:",decoder_path) try: encoder.load_state_dict(torch.load(encoder_path,map_location='cpu' if not use_gpu else None)) except RuntimeError as e: print("** ERROR loading encoder: ",encoder_path) print(e)