def eval(path="checkpoint3.pt"): net = CRNN(nclass=100).double() optimizer = optim.Adam(net.parameters()) checkpoint = torch.load(path) net.load_state_dict(checkpoint["model_state_dict"]) optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) epoch = checkpoint["epoch"] loss = checkpoint["loss"] print(f"model current epoch: {epoch} with loss: {loss}") net.eval() while 1: data = next(dataset) images = data["the_inputs"] labels = data["the_labels"] input_length = data["input_length"] label_length = data["label_length"] preds = net(images).detach() pred_texts, probs = decode_batch2(preds, string.printable) for i in range(len(pred_texts)): print(pred_texts[i], probs[i]) print(images[i].size())
def load_model_from_checkpoint(checkpoint_file_name, use_gpu=False): """Load a pretrained CRNN model.""" model = CRNN(line_size, 1, len(vocab), 256) checkpoint = torch.load(checkpoint_file_name, map_location='cpu' if not use_gpu else None) model.load_state_dict(checkpoint['state_dict']) model.float() model.eval() model = model.cuda() if use_gpu else model.cpu() return model
class PytorchOcr(): def __init__(self, model_path): alphabet_unicode = config.alphabet_v2 self.alphabet = ''.join([chr(uni) for uni in alphabet_unicode]) # print(len(self.alphabet)) self.nclass = len(self.alphabet) + 1 self.model = CRNN(config.imgH, 1, self.nclass, 256) self.cuda = False if torch.cuda.is_available(): self.cuda = True self.model.cuda() self.model.load_state_dict({ k.replace('module.', ''): v for k, v in torch.load(model_path).items() }) else: # self.model = nn.DataParallel(self.model) self.model.load_state_dict( torch.load(model_path, map_location='cpu')) self.model.eval() self.converter = strLabelConverter(self.alphabet) def recognize(self, img): h, w = img.shape[:2] if len(img.shape) == 3: img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) image = Image.fromarray(img) transformer = resizeNormalize((int(w / h * 32), 32)) image = transformer(image) image = image.view(1, *image.size()) image = Variable(image) if self.cuda: image = image.cuda() preds = self.model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) txt = self.converter.decode(preds.data, preds_size.data, raw=False) return txt
def ocr(orig_img, lines, checkpoint_file_name, use_gpu=False): """OCR on segmented lines.""" model = CRNN(line_size, 1, len(vocab), 256) checkpoint = torch.load(checkpoint_file_name, map_location='cpu' if not use_gpu else None) model.load_state_dict(checkpoint['state_dict']) model.float() model.eval() model = model.cuda() if use_gpu else model.cpu() torch.set_grad_enabled(False) result = [] for line in lines: (x1, y1), (x2, y2) = line line_img = image_resize(np.array(np.rot90(orig_img[y1:y2, x1:x2])), height=line_size) inputs = torch.from_numpy(line_img / 255).float().unsqueeze(0).unsqueeze(0) outputs = model(inputs) prediction = outputs.softmax(2).max(2)[1] def to_text(tensor, max_length=None, remove_repetitions=False): sentence = '' sequence = tensor.cpu().detach().numpy() for i in range(len(sequence)): if max_length is not None and i >= max_length: continue char = idx2char[sequence[i]] if char != 'B': # ignore blank if remove_repetitions and i != 0 and char == idx2char[ sequence[i - 1]]: pass else: sentence = sentence + char return sentence predicted_text = to_text(prediction[:, 0], remove_repetitions=True) result.append((line_img, predicted_text)) return result
def main(epoch_num, lr=0.1, training=True, fix_width=True): """ Main Args: training (bool, optional): If True, train the model, otherwise test it (default: True) fix_width (bool, optional): Scale images to fixed size (default: True) """ model_path = ('fix_width_' if fix_width else '') + 'crnn.pth' letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' root = 'data/IIIT5K/' if training: net = CRNN(1, len(letters) + 1) start_epoch = 0 # if there is pre-trained model, load it if os.path.exists(model_path): print('Pre-trained model detected.\nLoading model...') net.load_state_dict(torch.load(model_path)) if torch.cuda.is_available(): print('GPU detected.') net = train(root, start_epoch, epoch_num, letters, net=net, lr=lr, fix_width=fix_width) # save the trained model for training again torch.save(net.state_dict(), model_path) # test test(root, net, letters, fix_width=fix_width) else: net = CRNN(1, len(letters) + 1) if os.path.exists(model_path): net.load_state_dict(torch.load(model_path)) test(root, net, letters, fix_width=fix_width)
import os import torch import cv2 from crnn import CRNN from tqdm import tqdm import csv import numpy as np model = CRNN() model.load_state_dict(torch.load('55acc.pt')) model.eval() model.to('cuda') data_dir = "qia2020/test/" emo = {0: 'hap', 1: 'sur', 2: 'neu', 3: 'fea', 4: 'dis', 5: 'ang', 6: 'sad'} with open('test_confirm.csv', 'w', newline='') as csvfile: writer = csv.writer(csvfile) writer.writerow(['FileID', 'Emotion']) for filename in tqdm(sorted(os.listdir(data_dir))): if not filename.endswith(".mp4"): continue f = 'torch_video_3_test/' + filename[:5] + '.pt' X = torch.load(f) X = X.unsqueeze(0).to('cuda:0') with np.load(data_dir + filename[:5] + '.npz') as data: T = torch.Tensor(data['word_embed'])
opt.nrnn, opt.dropout, opt.variational_dropout, leakyRelu=True) print(net) params = net.state_dict() params_shape = [] for k, v in params.items(): # print(k, v.numpy().shape, reduce(mul, v.numpy().shape)) params_shape.append(reduce(mul, v.numpy().shape)) params_total = sum(params_shape) print('params_total:', params_total) if opt.finetune: print('Loading model from', opt.modeldir + opt.modelname) net.load_state_dict(torch.load(opt.modeldir + opt.modelname)) else: print('create new model') net.apply(weights_init) if opt.ngpu > 1: # print("Let's use", torch.cuda.device_count(), "GPUs!") net = nn.DataParallel(net, device_ids=range(opt.ngpu)) net.cuda() criterion = CTCLoss().cuda() if opt.adadelta: optimizer = optim.Adadelta(net.parameters(), lr=opt.lr) # , weight_decay=1e-8) elif opt.rms: optimizer = optim.RMSprop(net.parameters(), lr=opt.lr)
def train_cs2s(path=None): alphabet = string.printable nclass = len(alphabet) writer = SummaryWriter() dataset = FakeTextImageGenerator(batch_size=4).iter() criterion = CrossEntropyLoss(ignore_index=97) encoder = Encoder(512, 512, 1, 0) decoder = Decoder(512, 100, 100, 1, 0) net = ConvSeq2Seq(encoder, decoder, nclass=nclass).float() optimizer = optim.Adam(net.parameters(), lr=0.003) if path: net2 = CRNN(nclass=100).float() checkpoint = torch.load(path) net2.load_state_dict(checkpoint["model_state_dict"]) # optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) # epoch = checkpoint["epoch"] # loss = checkpoint["loss"] # print(f"model current epoch: {epoch} with loss: {loss}") print(net2) net.conv1.load_state_dict(net2.conv1.state_dict()) net.conv2.load_state_dict(net2.conv2.state_dict()) net.conv3.load_state_dict(net2.conv3.state_dict()) net.conv4.load_state_dict(net2.conv4.state_dict()) net.conv5.load_state_dict(net2.conv5.state_dict()) net.conv6.load_state_dict(net2.conv6.state_dict()) net.conv7.load_state_dict(net2.conv7.state_dict()) net.train() # loop over the dataset multiple times step = 0 for epoch in range(1, 1000): running_loss = 0.0 loop = tqdm(range(100)) for i in loop: data = next(dataset) images = data["the_inputs"] labels = data["the_labels"] input_length = data["input_length"] label_length = data["label_length"] targets = data["targets"] # print("target", targets) # print("target l", targets.size()) # print("label_l", label_length) # print("label_l l", label_length.size()) # print("pred_l", input_length) # print("pred_l l", input_length.size()) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(images.float(), labels, 0.5) # permute batchsize and seq_len dim to match labels when using .view(-1, output.size()[2]) outputs = outputs.permute(1, 0, 2) # print(outputs[8, 0, :]) # print(outputs[:, 0, :]) # print(outputs.size()) # print(labels.size()) output_argmax = outputs.argmax(2) # print(output_argmax.view(-1)) # print(labels.reshape(-1)) loss = criterion(outputs.reshape(-1, 100), labels.reshape(-1)) writer.add_scalar("loss", loss.item(), step) step += 1 loss.backward() # torch.nn.utils.clip_grad_norm_(net.parameters(), 1) optimizer.step() running_loss += loss.item() loop.set_postfix(epoch=epoch, Loss=(running_loss / (i + 1))) # print(f"Epoch: {epoch} | Loss: {running_loss/100}") torch.save( { "epoch": epoch, "model_state_dict": net.state_dict(), "optimizer_state_dict": optimizer.state_dict(), "loss": running_loss, }, "cs2s_good.pt", ) torch.save(net, "model_test_pretrained.pt") print("Finished Training")
def train(path=None): dataset = FakeTextImageGenerator(batch_size=16).iter() criterion = CTCLoss(reduction="mean", zero_infinity=True) net = CRNN(nclass=100).float() optimizer = optim.Adam(net.parameters(), lr=0.001) if path: checkpoint = torch.load(path) net.load_state_dict(checkpoint["model_state_dict"]) optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) epoch = checkpoint["epoch"] loss = checkpoint["loss"] print(f"model current epoch: {epoch} with loss: {loss}") # loop over the dataset multiple times for epoch in range(1, 1000): running_loss = 0.0 loop = tqdm(range(100)) for i in loop: data = next(dataset) images = data["the_inputs"] labels = data["the_labels"] input_length = data["input_length"] label_length = data["label_length"] targets = data["targets"] # print("target", targets) # print("target l", targets.size()) # print("label_l", label_length) # print("label_l l", label_length.size()) # print("pred_l", input_length) # print("pred_l l", input_length.size()) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(images.float()) # print(outputs[8, 0, :]) # print(outputs[:, 0, :]) # print(outputs.size()) loss = criterion(outputs, labels, input_length, label_length) # print(loss.item()) loss.backward() optimizer.step() running_loss += loss.item() loop.set_postfix(epoch=epoch, loss=(running_loss / (i + 1))) # print(f"Epoch: {epoch} | Loss: {running_loss/100}") torch.save( { "epoch": epoch, "model_state_dict": net.state_dict(), "optimizer_state_dict": optimizer.state_dict(), "loss": running_loss, }, "checkpoint5.pt", ) print("Finished Training")
num_classes = 31 rnn_input_size = int((inp_size - conv_kernel + 2 * padding) / stride + 1) * channels_out print(f"INPUT SIZE: {inp_size}") print(f"RNN INPUT SIZE: {rnn_input_size}") sys.exit(0) model = CRNN(conv_kernel, channels_out, rnn_input_size, hidden_neurons_1, hidden_neurons_2, fc1, num_classes) print("MODEL ARCHITECTURE:") print(model) print("Load trained model weights...") model_path = "trained_models/128_hidden/run2/best_model_1fc.pt" model.load_state_dict(torch.load(model_path)) model.eval() print("Loaded trained model weights successfully!") ####################################### ################## 2. ################# ####################################### os.makedirs("activations", exist_ok=True) for i, (data, true_labels) in enumerate(testing_dataloader): data = data.type(torch.FloatTensor) true_labels = true_labels.type(torch.LongTensor) stg_activations, tp_activations, ifg_activations, word_predictions = model.out( data)
classname = m.__class__.__name__ if classname.find("Conv") != -1: torch.nn.init.normal_(m.weight.data, 0.0, 0.02).cuda() if hasattr(m, "bias") and m.bias is not None: torch.nn.init.constant_(m.bias.data, 0.0).cuda() elif classname.find("BatchNorm2d") != -1: torch.nn.init.normal_(m.weight.data, 1.0, 0.02).cuda() torch.nn.init.constant_(m.bias.data, 0.0).cuda() # weitghts initalize weights_init(model) #load saved model if opt.savedmodel != "save": model.load_state_dict(torch.load("savedmodel/%s.pth" % opt.savedmodel)) #create optimizer optimizer = torch.optim.Adadelta(model.parameters()) #dataloader traindata = DataLoader(dataprocessing.ImageDataset(opt.dataroot, mode="train"), batch_size=opt.batchsize, shuffle=True) print(len(traindata)) testdata = DataLoader(dataprocessing.ImageDataset(opt.dataroot, mode="test"), batch_size=opt.batchsize, shuffle=True) print(len(testdata)) # Loss function lossfunction = CTCLoss() process = dataprocessing.ProcessText(opt.alphabet) # tensorboard
from PIL import Image from torchvision import transforms from crnn import CRNN import torch from utils import Converter print('load input image...') image = Image.open('demo_1.png').convert('L') transform = transforms.Compose( [transforms.Resize((32, 100)), transforms.ToTensor()]) image = transform(image) image = image.unsqueeze(0) image = image.cuda() print('load trained model...') crnn = CRNN(1, 38, 256) crnn = crnn.cuda() crnn.load_state_dict(torch.load('trained_model/crnn.pth')) crnn.eval() predicted_label = crnn(image) _, predicted_label = predicted_label.max(2) predicted_label = predicted_label.transpose(1, 0).contiguous().view(-1) converter = Converter('0123456789abcdefghijklmnopqrstuvwxyz*') predicted_length = [predicted_label.size(0)] predicted_label = converter.decode(predicted_label, predicted_length, raw=False) print('predicted label: %s' % (predicted_label))