Example #1
0
    alphabet = utils.getAlphabetStr(opt.alphabet)

    nclass = len(alphabet) + 3  # decoder的时候,需要的类别数,3 for SOS,EOS和blank
    print(" -- Number of classes:", nclass)
    nc = 1

    converter = utils.strLabelConverterForAttention(alphabet)
    alphabet = utils.getAlphabetStr(opt.alphabet)

    # criterion = torch.nn.CrossEntropyLoss()
    criterion = torch.nn.NLLLoss()  # 最后的输出要为log_softmax

    encoder = crnn.CNN(opt.imgH, nc, opt.nh, cnn_size=opt.cnn_size)
    # decoder = crnn.decoder(opt.nh, nclass, dropout_p=0.1, max_length=opt.max_width)        # max_length:w/4,为encoder特征提取之后宽度方向上的序列长度
    decoder = crnn.decoderV2(
        opt.nh, nclass,
        dropout_p=opt.dropout)  # For prediction of an indefinite long sequence
    encoder.apply(utils.weights_init)
    decoder.apply(utils.weights_init)
    # continue training or use the pretrained model to initial the parameters of the encoder and decoder
    if opt.encoder:
        print('loading pretrained encoder model from %s' % opt.encoder)
        encoder.load_state_dict(torch.load(opt.encoder))
    if opt.decoder:
        print('loading pretrained encoder model from %s' % opt.decoder)
        decoder.load_state_dict(torch.load(opt.decoder))
    print(encoder)
    model_summary(encoder)
    print(decoder)
    model_summary(decoder)
    test_dataset = dataset.listDataset(list_file =opt.vallist, transform=dataset.resizeNormalize((opt.imgW, opt.imgH)))
else:
    test_dataset = dataset.listDataset(list_file =opt.vallist, transform=dataset.paddingNormalize(opt.imgH, opt.imgW))

nclass = len(alphabet) + 3          # decoder的时候,需要的类别数,3 for SOS,EOS和blank 
print('nclass:',nclass)
cfg.SEQUENCE.NUM_CHAR = nclass
nc = 1

converter = utils.strLabelConverterForAttention(alphabet)
# criterion = torch.nn.CrossEntropyLoss()
criterion = torch.nn.NLLLoss()              # 最后的输出要为log_softmax

if opt.mode == '1D':
    encoder = crnn.CNN(opt.imgH, nc, opt.nh, cfg)
    decoder = crnn.decoderV2(opt.nh, nclass, dropout_p=0.1)
else:
    encoder = crnn.CNN(opt.imgH, nc, opt.nh, cfg, mode='2D',dim_in=512)
    # decoder = crnn.decoder(opt.nh, nclass, dropout_p=0.1, max_length=opt.max_width)        # max_length:w/4,为encoder特征提取之后宽度方向上的序列长度
    #decoder = crnn.decoderV2(opt.nh, nclass, dropout_p=0.1)        # For prediction of an indefinite long sequence
    decoder = crnn.SequencePredictor(cfg, nclass)
    
encoder.apply(weights_init)
decoder.apply(weights_init)
# continue training or use the pretrained model to initial the parameters of the encoder and decoder
if opt.encoder:
    print('loading pretrained encoder model from %s' % opt.encoder)
    encoder.load_state_dict(torch.load(opt.encoder))
if opt.decoder:
    print('loading pretrained decoder model from %s' % opt.decoder)
    decoder.load_state_dict(torch.load(opt.decoder))
Example #3
0
                        help='the mode of attention')
    opt = parser.parse_args()

    #use_gpu = True
    # encoder_path = './expr/attentioncnn/encoder_10.pth'
    # decoder_path = './expr/attentioncnn/decoder_10.pth'
    # img_path = './test_img/20441531_4212871437.jpg'
    #max_length = 15                          # 最长字符串的长度
    #EOS_TOKEN = 1

    nclass = len(alphabet) + 3
    cfg.SEQUENCE.NUM_CHAR = nclass
    nc = 1
    if opt.mode == '1D':
        encoder = crnn.CNN(32, 1, 256, cfg)  # 编码器
        decoder = crnn.decoderV2(256,
                                 nclass)  # seq to seq的解码器, nclass在decoder中还加了2
        #decoder = crnn.decoderV2(256, nclass - 2)
    else:
        encoder = crnn.CNN(opt.imgH,
                           nc,
                           opt.nh,
                           cfg,
                           mode=opt.mode,
                           dim_in=opt.dim_in)
        decoder = crnn.SequencePredictor(cfg, nclass)

    if opt.encoder and opt.decoder:
        print('loading pretrained models ......')

        encoder_dict = encoder.state_dict()
        trained_encoder_dict = torch.load(opt.encoder)
Example #4
0
    img_paths = []
    for i in opt.image:
        if os.path.isdir(i):
            pimages = [i+os.sep+x for x in os.listdir(i) if x.split(".")[-1].lower() in ["jpg","tif","png"]]
            img_paths.extend(pimages)
        else:
            img_paths.append(i)


    EOS_TOKEN = 1
    alphabet = src.utils.getAlphabetStr(opt.alphabet)
    nclass = len(alphabet) + 3

    encoder = crnn.CNN(height, 1, opt.nh, cnn_size=opt.cnn_size)#, cnn_size=16)
    # decoder = crnn.decoder(256, nclass)     # seq to seq的解码器, nclass在decoder中还加了2
    decoder = crnn.decoderV2(opt.nh, nclass)
    if opt.verbose:
        print(encoder)
        model_summary(encoder.cnn)
        print(decoder)
        model_summary(decoder)
    if encoder_path and decoder_path:
        if opt.verbose:
            print('loading pretrained models ......')
            print("   - encoder_path:",encoder_path)
            print("   - decoder_path:",decoder_path)
        try:
            encoder.load_state_dict(torch.load(encoder_path,map_location='cpu' if not use_gpu else None))
        except RuntimeError as e:
            print("** ERROR loading encoder: ",encoder_path)
            print(e)