def main(filepath): image = Image.open(r'images/' + filepath).convert('RGB') image = transformer(image) if torch.cuda.is_available() and cfg.use_gpu: image = image.cuda() image = image.view(1, *image.size()) image = torch.autograd.Variable(image) encoder = crnn.Encoder(3, cfg.hidden_size) # no dropout during inference decoder = crnn.Decoder(cfg.hidden_size, num_classes, dropout_p=0.0, max_length=cfg.max_width) if torch.cuda.is_available() and cfg.use_gpu: encoder = encoder.cuda() decoder = decoder.cuda() map_location = 'cuda' else: map_location = 'cpu' encoder.load_state_dict(torch.load(cfg.encoder, map_location=map_location)) print('loading pretrained encoder models from {}.'.format(cfg.encoder)) decoder.load_state_dict(torch.load(cfg.decoder, map_location=map_location)) print('loading pretrained decoder models from {}.'.format(cfg.decoder)) encoder.eval() decoder.eval() encoder_out = encoder(image) max_length = 20 decoder_input = torch.zeros(1).long() decoder_hidden = decoder.initHidden(1) if torch.cuda.is_available() and cfg.use_gpu: decoder_input = decoder_input.cuda() decoder_hidden = decoder_hidden.cuda() words, prob = seq2seq_decode(encoder_out, decoder, decoder_input, decoder_hidden, max_length) with open('data.txt', 'a', encoding='utf-8') as f: print(filepath + ',{0}'.format(words), file=f) print('predict_string: {} => predict_probility: {}'.format(words, prob))
def main(): image = Image.open(cfg.img_path).convert('RGB') image = transformer(image) if torch.cuda.is_available() and cfg.use_gpu: image = image.cuda() image = image.view(1, *image.size()) # (1, *image.size()) # logic ??? image = torch.autograd.Variable(image) encoder = crnn.Encoder(channel_size=3, hidden_size=cfg.hidden_size) # no dropout during inference decoder = crnn.Decoder(hidden_size=cfg.hidden_size, output_size=num_classes, dropout_p=0.0, max_length=cfg.max_width) if torch.cuda.is_available() and cfg.use_gpu: encoder = encoder.cuda() decoder = decoder.cuda() map_location = 'cuda' else: map_location = 'cpu' encoder.load_state_dict(torch.load(cfg.encoder, map_location=map_location)) print('loading pretrained encoder models from {}.'.format(cfg.encoder)) decoder.load_state_dict(torch.load(cfg.decoder, map_location=map_location)) print('loading pretrained decoder models from {}.'.format(cfg.decoder)) encoder.eval() decoder.eval() encoder_out = encoder(image) max_length = 20 decoder_input = torch.zeros(1).long() # char label decoder_hidden = decoder.initHidden(batch_size=1) if torch.cuda.is_available() and cfg.use_gpu: decoder_input = decoder_input.cuda() decoder_hidden = decoder_hidden.cuda() words, prob = seq2seq_decode(encoder_out, decoder, decoder_input, decoder_hidden, max_length) print('predict_string: {} => predict_probility: {}'.format(words, prob))
if args.cuda: detect_net.load_state_dict( copyStateDict(torch.load(args.trained_model))) else: detect_net.load_state_dict( copyStateDict(torch.load(args.trained_model, map_location='cpu'))) if args.cuda: detect_net = detect_net.cuda() detect_net = torch.nn.DataParallel(detect_net) cudnn.benchmark = False detect_net.eval() # load rec_net encoder = crnn.Encoder(3, args.hidden_size) # no dropout during inference decoder = crnn.Decoder(args.hidden_size, num_classes, dropout_p=0.0, max_length=args.max_width) print(encoder) print(decoder) if torch.cuda.is_available() and args.use_gpu: encoder = encoder.cuda() decoder = decoder.cuda() map_location = 'cuda' else: map_location = 'cpu' encoder.load_state_dict(torch.load(args.encoder,
def main(): if not os.path.exists(cfg.model): os.makedirs(cfg.model) # create train dataset train_dataset = dataset.TextLineDataset(text_line_file=cfg.train_list, transform=None) sampler = dataset.RandomSequentialSampler(train_dataset, cfg.batch_size) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.batch_size, shuffle=False, sampler=sampler, num_workers=int(cfg.num_workers), collate_fn=dataset.AlignCollate(img_height=cfg.img_height, img_width=cfg.img_width)) # create test dataset test_dataset = dataset.TextLineDataset(text_line_file=cfg.eval_list, transform=dataset.ResizeNormalize( img_width=cfg.img_width, img_height=cfg.img_height)) test_loader = torch.utils.data.DataLoader(test_dataset, shuffle=False, batch_size=1, num_workers=int(cfg.num_workers)) # create crnn/seq2seq/attention network encoder = crnn.Encoder(channel_size=3, hidden_size=cfg.hidden_size) # for prediction of an indefinite long sequence decoder = crnn.Decoder(hidden_size=cfg.hidden_size, output_size=num_classes, dropout_p=0.1, max_length=cfg.max_width) print(encoder) print(decoder) encoder.apply(utils.weights_init) decoder.apply(utils.weights_init) if cfg.encoder: print('loading pretrained encoder model from %s' % cfg.encoder) encoder.load_state_dict(torch.load(cfg.encoder)) if cfg.decoder: print('loading pretrained encoder model from %s' % cfg.decoder) decoder.load_state_dict(torch.load(cfg.decoder)) # create input tensor image = torch.FloatTensor(cfg.batch_size, 3, cfg.img_height, cfg.img_width) text = torch.LongTensor(cfg.batch_size) criterion = torch.nn.NLLLoss() assert torch.cuda.is_available( ), "Please run \'train.py\' script on nvidia cuda devices." encoder.cuda() decoder.cuda() image = image.cuda() text = text.cuda() criterion = criterion.cuda() # train crnn train(image, text, encoder, decoder, criterion, train_loader, teach_forcing_prob=cfg.teaching_forcing_prob) # do evaluation after training evaluate(image, text, encoder, decoder, test_loader, max_eval_iter=100)