def eval(path="checkpoint3.pt"): net = CRNN(nclass=100).double() optimizer = optim.Adam(net.parameters()) checkpoint = torch.load(path) net.load_state_dict(checkpoint["model_state_dict"]) optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) epoch = checkpoint["epoch"] loss = checkpoint["loss"] print(f"model current epoch: {epoch} with loss: {loss}") net.eval() while 1: data = next(dataset) images = data["the_inputs"] labels = data["the_labels"] input_length = data["input_length"] label_length = data["label_length"] preds = net(images).detach() pred_texts, probs = decode_batch2(preds, string.printable) for i in range(len(pred_texts)): print(pred_texts[i], probs[i]) print(images[i].size())
def load_model_from_checkpoint(checkpoint_file_name, use_gpu=False): """Load a pretrained CRNN model.""" model = CRNN(line_size, 1, len(vocab), 256) checkpoint = torch.load(checkpoint_file_name, map_location='cpu' if not use_gpu else None) model.load_state_dict(checkpoint['state_dict']) model.float() model.eval() model = model.cuda() if use_gpu else model.cpu() return model
class PytorchOcr(): def __init__(self, model_path): alphabet_unicode = config.alphabet_v2 self.alphabet = ''.join([chr(uni) for uni in alphabet_unicode]) # print(len(self.alphabet)) self.nclass = len(self.alphabet) + 1 self.model = CRNN(config.imgH, 1, self.nclass, 256) self.cuda = False if torch.cuda.is_available(): self.cuda = True self.model.cuda() self.model.load_state_dict({ k.replace('module.', ''): v for k, v in torch.load(model_path).items() }) else: # self.model = nn.DataParallel(self.model) self.model.load_state_dict( torch.load(model_path, map_location='cpu')) self.model.eval() self.converter = strLabelConverter(self.alphabet) def recognize(self, img): h, w = img.shape[:2] if len(img.shape) == 3: img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) image = Image.fromarray(img) transformer = resizeNormalize((int(w / h * 32), 32)) image = transformer(image) image = image.view(1, *image.size()) image = Variable(image) if self.cuda: image = image.cuda() preds = self.model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) txt = self.converter.decode(preds.data, preds_size.data, raw=False) return txt
def ocr(orig_img, lines, checkpoint_file_name, use_gpu=False): """OCR on segmented lines.""" model = CRNN(line_size, 1, len(vocab), 256) checkpoint = torch.load(checkpoint_file_name, map_location='cpu' if not use_gpu else None) model.load_state_dict(checkpoint['state_dict']) model.float() model.eval() model = model.cuda() if use_gpu else model.cpu() torch.set_grad_enabled(False) result = [] for line in lines: (x1, y1), (x2, y2) = line line_img = image_resize(np.array(np.rot90(orig_img[y1:y2, x1:x2])), height=line_size) inputs = torch.from_numpy(line_img / 255).float().unsqueeze(0).unsqueeze(0) outputs = model(inputs) prediction = outputs.softmax(2).max(2)[1] def to_text(tensor, max_length=None, remove_repetitions=False): sentence = '' sequence = tensor.cpu().detach().numpy() for i in range(len(sequence)): if max_length is not None and i >= max_length: continue char = idx2char[sequence[i]] if char != 'B': # ignore blank if remove_repetitions and i != 0 and char == idx2char[ sequence[i - 1]]: pass else: sentence = sentence + char return sentence predicted_text = to_text(prediction[:, 0], remove_repetitions=True) result.append((line_img, predicted_text)) return result
def infer(files, save_static_path=None): result_list = [] place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() print('train with {}'.format(place)) with fluid.dygraph.guard(place): params, _ = fluid.load_dygraph('{}/crnn_best'.format('output/baidu_model'))#train_parameters['save_model_dir'])) # crnn = CRNN(train_parameters["class_dim"] + 1, 1) crnn = CRNN(3828, 1) crnn.load_dict(params) crnn.eval() for file in tqdm(files): img = precess_img(file) img = fluid.dygraph.to_variable(img).astype('float32') if save_static_path is not None: out_dygraph, static_layer = TracedLayer.trace(crnn, inputs=[img]) # 将转换后的模型保存 static_layer.save_inference_model(save_static_path, feed=[0], fetch=[0]) pred = crnn(img) output = utils.greedy_decode(pred.numpy(), blank=train_parameters["class_dim"]) p_s = "".join([train_parameters['r_label_dict'][c] for c in output[0]]) result_list.append('{0}\t{1}'.format(os.path.basename(file), p_s)) break return result_list
import os import torch import cv2 from crnn import CRNN from tqdm import tqdm import csv import numpy as np model = CRNN() model.load_state_dict(torch.load('55acc.pt')) model.eval() model.to('cuda') data_dir = "qia2020/test/" emo = {0: 'hap', 1: 'sur', 2: 'neu', 3: 'fea', 4: 'dis', 5: 'ang', 6: 'sad'} with open('test_confirm.csv', 'w', newline='') as csvfile: writer = csv.writer(csvfile) writer.writerow(['FileID', 'Emotion']) for filename in tqdm(sorted(os.listdir(data_dir))): if not filename.endswith(".mp4"): continue f = 'torch_video_3_test/' + filename[:5] + '.pt' X = torch.load(f) X = X.unsqueeze(0).to('cuda:0') with np.load(data_dir + filename[:5] + '.npz') as data: T = torch.Tensor(data['word_embed'])
def train(): epoch_num = train_parameters["num_epochs"] batch_size = train_parameters["train_batch_size"] place = fluid.CUDAPlace( 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() logger.info('train with {}'.format(place)) with fluid.dygraph.guard(place): # 数据加载 file_list = open(train_parameters['train_list']).readlines() train_reader = get_loader( file_list=file_list, input_size=train_parameters['input_size'], max_char_per_line=train_parameters['max_char_per_line'], mean_color=train_parameters['mean_color'], batch_size=train_parameters['train_batch_size'], mode='train', label_dict=train_parameters['label_dict'], place=place) batch_num = len(train_reader()) crnn = CRNN(train_parameters["class_dim"] + 1, batch_size=batch_size) total_step = batch_num * epoch_num LR = train_parameters['learning_rate'] lr = fluid.layers.polynomial_decay(LR, total_step, 1e-7, power=0.9) # lr = fluid.layers.piecewise_decay([total_step // 3, total_step * 2 // 3], [LR, LR * 0.1, LR * 0.01]) optimizer = fluid.optimizer.Adam(learning_rate=lr, parameter_list=crnn.parameters()) if train_parameters["continue_train"]: # 加载上一次训练的模型,继续训练 params_dict, opt_dict = fluid.load_dygraph('{}/crnn_latest'.format( train_parameters['save_model_dir'])) crnn.set_dict(params_dict) optimizer.set_dict(opt_dict) logger.info("load model from {}".format( train_parameters['save_model_dir'])) current_best = -1 start_epoch = 0 for epoch in range(start_epoch, epoch_num): crnn.train() tic = time.time() for batch_id, (img, label, label_len) in enumerate(train_reader()): out = crnn(img) out_for_loss = fluid.layers.transpose(out, [1, 0, 2]) input_length = np.array([out.shape[1]] * out.shape[0]).astype("int64") input_length = fluid.dygraph.to_variable(input_length) input_length.stop_gradient = True loss = fluid.layers.warpctc( input=out_for_loss, label=label.astype(np.int32), input_length=input_length, label_length=label_len, blank=train_parameters["class_dim"], norm_by_times=True) avg_loss = fluid.layers.reduce_mean(loss) cur_acc_num, cur_all_num = acc_batch(out.numpy(), label.numpy()) if batch_id % 1 == 0: logger.info( "epoch [{}/{}], step [{}/{}], loss: {:.6f}, acc: {:.4f}, lr: {}, time: {:.4f}" .format(epoch, epoch_num, batch_id, batch_num, avg_loss.numpy()[0], cur_acc_num / cur_all_num, optimizer.current_step_lr(), time.time() - tic)) tic = time.time() avg_loss.backward() optimizer.minimize(avg_loss) crnn.clear_gradients() fluid.save_dygraph( crnn.state_dict(), '{}/crnn_latest'.format(train_parameters['save_model_dir'])) fluid.save_dygraph( optimizer.state_dict(), '{}/crnn_latest'.format(train_parameters['save_model_dir'])) crnn.eval() ratio = eval_model(crnn, place=place) if ratio >= current_best: fluid.save_dygraph( crnn.state_dict(), '{}/crnn_best'.format(train_parameters['save_model_dir'])) fluid.save_dygraph( optimizer.state_dict(), '{}/crnn_best'.format(train_parameters['save_model_dir'])) current_best = ratio logger.info("save model to {}, current best acc:{:.2f}".format( train_parameters['save_model_dir'], ratio)) logger.info("train end")
from PIL import Image from torchvision import transforms from crnn import CRNN import torch from utils import Converter print('load input image...') image = Image.open('demo_1.png').convert('L') transform = transforms.Compose( [transforms.Resize((32, 100)), transforms.ToTensor()]) image = transform(image) image = image.unsqueeze(0) image = image.cuda() print('load trained model...') crnn = CRNN(1, 38, 256) crnn = crnn.cuda() crnn.load_state_dict(torch.load('trained_model/crnn.pth')) crnn.eval() predicted_label = crnn(image) _, predicted_label = predicted_label.max(2) predicted_label = predicted_label.transpose(1, 0).contiguous().view(-1) converter = Converter('0123456789abcdefghijklmnopqrstuvwxyz*') predicted_length = [predicted_label.size(0)] predicted_label = converter.decode(predicted_label, predicted_length, raw=False) print('predicted label: %s' % (predicted_label))