Ejemplo n.º 1
0
    def __init__(self):
        encoder_path = './expr/attentioncnn/encoder_600.pth'
        decoder_path = './expr/attentioncnn/decoder_600.pth'
        self.alphabet = '0123456789'
        self.max_length = 7  # 最长字符串的长度
        self.EOS_TOKEN = 1
        self.use_gpu = True
        self.max_width = 220
        self.converter = utils.strLabelConverterForAttention(self.alphabet)
        self.transform = transforms.ToTensor()

        nclass = len(self.alphabet) + 3
        encoder = crnn.CNN(32, 1, 256)  # 编码器
        decoder = crnn.decoder(256,
                               nclass)  # seq to seq的解码器, nclass在decoder中还加了2

        if encoder_path and decoder_path:
            print('loading pretrained models ......')
            encoder.load_state_dict(torch.load(encoder_path))
            decoder.load_state_dict(torch.load(decoder_path))
        if torch.cuda.is_available() and self.use_gpu:
            encoder = encoder.cuda()
            decoder = decoder.cuda()
        self.encoder = encoder.eval()
        self.decoder = decoder.eval()
Ejemplo n.º 2
0
criterion = torch.nn.NLLLoss()


def weights_init(model):
    # Official init from torch repo.
    for m in model.modules():
        if isinstance(m, nn.Conv2d):
            nn.init.kaiming_normal_(m.weight)
        elif isinstance(m, nn.BatchNorm2d):
            nn.init.constant_(m.weight, 1)
            nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.Linear):
            nn.init.constant_(m.bias, 0)


encoder = crnn.CNN(opt.imgH, nc, opt.nh)
decoder = crnn.decoder(
    opt.nh, nclass, dropout_p=0.1,
    max_length=opt.max_width)  # max_length:w/4,为encoder特征提取之后宽度方向上的序列长度
encoder.apply(weights_init)
decoder.apply(weights_init)
# continue training or use the pretrained model to initial the parameters of the encoder and decoder
if opt.encoder:
    print('loading pretrained encoder model from %s' % opt.encoder)
    encoder.load_state_dict(torch.load(opt.encoder))
if opt.decoder:
    print('loading pretrained encoder model from %s' % opt.decoder)
    encoder.load_state_dict(torch.load(opt.encoder))
print(encoder)
print(decoder)
Ejemplo n.º 3
0
import utils
import dataset
from PIL import Image
from utils import alphabet
import models.crnn_lang as crnn

use_gpu = True

encoder_path = './expr/attentioncnn/encoder_5.pth'
decoder_path = './expr/attentioncnn/decoder_5.pth'
img_path = './test_img/20441531_4212871437.jpg'
max_length = 15  # 最长字符串的长度
EOS_TOKEN = 1

nclass = len(alphabet) + 3
encoder = crnn.CNN(32, 1, 256)  # 编码器
decoder = crnn.decoder(256, nclass)  # seq to seq的解码器, nclass在decoder中还加了2

if encoder_path and decoder_path:
    print('loading pretrained models ......')
    encoder.load_state_dict(torch.load(encoder_path))
    decoder.load_state_dict(torch.load(decoder_path))
if torch.cuda.is_available() and use_gpu:
    encoder = encoder.cuda()
    decoder = decoder.cuda()

converter = utils.strLabelConverterForAttention(alphabet)

transformer = dataset.resizeNormalize((280, 32))
image = Image.open(img_path).convert('L')
image = transformer(image)
Ejemplo n.º 4
0
                                           transform=dataset.resizeNormalize(
                                               (opt.imgW, opt.imgH)))

    alphabet = utils.getAlphabetStr(opt.alphabet)

    nclass = len(alphabet) + 3  # decoder的时候,需要的类别数,3 for SOS,EOS和blank
    print(" -- Number of classes:", nclass)
    nc = 1

    converter = utils.strLabelConverterForAttention(alphabet)
    alphabet = utils.getAlphabetStr(opt.alphabet)

    # criterion = torch.nn.CrossEntropyLoss()
    criterion = torch.nn.NLLLoss()  # 最后的输出要为log_softmax

    encoder = crnn.CNN(opt.imgH, nc, opt.nh, cnn_size=opt.cnn_size)
    # decoder = crnn.decoder(opt.nh, nclass, dropout_p=0.1, max_length=opt.max_width)        # max_length:w/4,为encoder特征提取之后宽度方向上的序列长度
    decoder = crnn.decoderV2(
        opt.nh, nclass,
        dropout_p=opt.dropout)  # For prediction of an indefinite long sequence
    encoder.apply(utils.weights_init)
    decoder.apply(utils.weights_init)
    # continue training or use the pretrained model to initial the parameters of the encoder and decoder
    if opt.encoder:
        print('loading pretrained encoder model from %s' % opt.encoder)
        encoder.load_state_dict(torch.load(opt.encoder))
    if opt.decoder:
        print('loading pretrained encoder model from %s' % opt.decoder)
        decoder.load_state_dict(torch.load(opt.decoder))
    print(encoder)
    model_summary(encoder)
Ejemplo n.º 5
0
                        default='2D',
                        help='the mode of attention')
    opt = parser.parse_args()

    #use_gpu = True
    # encoder_path = './expr/attentioncnn/encoder_10.pth'
    # decoder_path = './expr/attentioncnn/decoder_10.pth'
    # img_path = './test_img/20441531_4212871437.jpg'
    #max_length = 15                          # 最长字符串的长度
    #EOS_TOKEN = 1

    nclass = len(alphabet) + 3
    cfg.SEQUENCE.NUM_CHAR = nclass
    nc = 1
    if opt.mode == '1D':
        encoder = crnn.CNN(32, 1, 256, cfg)  # 编码器
        decoder = crnn.decoderV2(256,
                                 nclass)  # seq to seq的解码器, nclass在decoder中还加了2
        #decoder = crnn.decoderV2(256, nclass - 2)
    else:
        encoder = crnn.CNN(opt.imgH,
                           nc,
                           opt.nh,
                           cfg,
                           mode=opt.mode,
                           dim_in=opt.dim_in)
        decoder = crnn.SequencePredictor(cfg, nclass)

    if opt.encoder and opt.decoder:
        print('loading pretrained models ......')
Ejemplo n.º 6
0
if opt.mode == '1D':
    test_dataset = dataset.listDataset(list_file =opt.vallist, transform=dataset.resizeNormalize((opt.imgW, opt.imgH)))
else:
    test_dataset = dataset.listDataset(list_file =opt.vallist, transform=dataset.paddingNormalize(opt.imgH, opt.imgW))

nclass = len(alphabet) + 3          # decoder的时候,需要的类别数,3 for SOS,EOS和blank 
print('nclass:',nclass)
cfg.SEQUENCE.NUM_CHAR = nclass
nc = 1

converter = utils.strLabelConverterForAttention(alphabet)
# criterion = torch.nn.CrossEntropyLoss()
criterion = torch.nn.NLLLoss()              # 最后的输出要为log_softmax

if opt.mode == '1D':
    encoder = crnn.CNN(opt.imgH, nc, opt.nh, cfg)
    decoder = crnn.decoderV2(opt.nh, nclass, dropout_p=0.1)
else:
    encoder = crnn.CNN(opt.imgH, nc, opt.nh, cfg, mode='2D',dim_in=512)
    # decoder = crnn.decoder(opt.nh, nclass, dropout_p=0.1, max_length=opt.max_width)        # max_length:w/4,为encoder特征提取之后宽度方向上的序列长度
    #decoder = crnn.decoderV2(opt.nh, nclass, dropout_p=0.1)        # For prediction of an indefinite long sequence
    decoder = crnn.SequencePredictor(cfg, nclass)
    
encoder.apply(weights_init)
decoder.apply(weights_init)
# continue training or use the pretrained model to initial the parameters of the encoder and decoder
if opt.encoder:
    print('loading pretrained encoder model from %s' % opt.encoder)
    encoder.load_state_dict(torch.load(opt.encoder))
if opt.decoder:
    print('loading pretrained decoder model from %s' % opt.decoder)
Ejemplo n.º 7
0
        height = opt.imgH

    img_paths = []
    for i in opt.image:
        if os.path.isdir(i):
            pimages = [i+os.sep+x for x in os.listdir(i) if x.split(".")[-1].lower() in ["jpg","tif","png"]]
            img_paths.extend(pimages)
        else:
            img_paths.append(i)


    EOS_TOKEN = 1
    alphabet = src.utils.getAlphabetStr(opt.alphabet)
    nclass = len(alphabet) + 3

    encoder = crnn.CNN(height, 1, opt.nh, cnn_size=opt.cnn_size)#, cnn_size=16)
    # decoder = crnn.decoder(256, nclass)     # seq to seq的解码器, nclass在decoder中还加了2
    decoder = crnn.decoderV2(opt.nh, nclass)
    if opt.verbose:
        print(encoder)
        model_summary(encoder.cnn)
        print(decoder)
        model_summary(decoder)
    if encoder_path and decoder_path:
        if opt.verbose:
            print('loading pretrained models ......')
            print("   - encoder_path:",encoder_path)
            print("   - decoder_path:",decoder_path)
        try:
            encoder.load_state_dict(torch.load(encoder_path,map_location='cpu' if not use_gpu else None))
        except RuntimeError as e: