def eval(path="checkpoint3.pt"):
    net = CRNN(nclass=100).double()
    optimizer = optim.Adam(net.parameters())

    checkpoint = torch.load(path)
    net.load_state_dict(checkpoint["model_state_dict"])
    optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
    epoch = checkpoint["epoch"]
    loss = checkpoint["loss"]
    print(f"model current epoch: {epoch} with loss: {loss}")

    net.eval()

    while 1:
        data = next(dataset)
        images = data["the_inputs"]
        labels = data["the_labels"]
        input_length = data["input_length"]
        label_length = data["label_length"]

        preds = net(images).detach()
        pred_texts, probs = decode_batch2(preds, string.printable)
        for i in range(len(pred_texts)):
            print(pred_texts[i], probs[i])
            print(images[i].size())
Beispiel #2
0
def load_model_from_checkpoint(checkpoint_file_name, use_gpu=False):
    """Load a pretrained CRNN model."""
    model = CRNN(line_size, 1, len(vocab), 256)
    checkpoint = torch.load(checkpoint_file_name,
                            map_location='cpu' if not use_gpu else None)
    model.load_state_dict(checkpoint['state_dict'])
    model.float()
    model.eval()
    model = model.cuda() if use_gpu else model.cpu()
    return model
Beispiel #3
0
class PytorchOcr():
    def __init__(self, model_path):
        alphabet_unicode = config.alphabet_v2
        self.alphabet = ''.join([chr(uni) for uni in alphabet_unicode])
        # print(len(self.alphabet))
        self.nclass = len(self.alphabet) + 1
        self.model = CRNN(config.imgH, 1, self.nclass, 256)
        self.cuda = False
        if torch.cuda.is_available():
            self.cuda = True
            self.model.cuda()
            self.model.load_state_dict({
                k.replace('module.', ''): v
                for k, v in torch.load(model_path).items()
            })
        else:
            # self.model = nn.DataParallel(self.model)
            self.model.load_state_dict(
                torch.load(model_path, map_location='cpu'))
        self.model.eval()
        self.converter = strLabelConverter(self.alphabet)

    def recognize(self, img):
        h, w = img.shape[:2]
        if len(img.shape) == 3:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        image = Image.fromarray(img)
        transformer = resizeNormalize((int(w / h * 32), 32))
        image = transformer(image)
        image = image.view(1, *image.size())
        image = Variable(image)

        if self.cuda:
            image = image.cuda()

        preds = self.model(image)

        _, preds = preds.max(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)

        preds_size = Variable(torch.IntTensor([preds.size(0)]))
        txt = self.converter.decode(preds.data, preds_size.data, raw=False)

        return txt
Beispiel #4
0
def ocr(orig_img, lines, checkpoint_file_name, use_gpu=False):
    """OCR on segmented lines."""
    model = CRNN(line_size, 1, len(vocab), 256)
    checkpoint = torch.load(checkpoint_file_name,
                            map_location='cpu' if not use_gpu else None)
    model.load_state_dict(checkpoint['state_dict'])
    model.float()
    model.eval()
    model = model.cuda() if use_gpu else model.cpu()
    torch.set_grad_enabled(False)

    result = []
    for line in lines:
        (x1, y1), (x2, y2) = line
        line_img = image_resize(np.array(np.rot90(orig_img[y1:y2, x1:x2])),
                                height=line_size)

        inputs = torch.from_numpy(line_img /
                                  255).float().unsqueeze(0).unsqueeze(0)
        outputs = model(inputs)
        prediction = outputs.softmax(2).max(2)[1]

        def to_text(tensor, max_length=None, remove_repetitions=False):
            sentence = ''
            sequence = tensor.cpu().detach().numpy()
            for i in range(len(sequence)):
                if max_length is not None and i >= max_length:
                    continue
                char = idx2char[sequence[i]]
                if char != 'B':  # ignore blank
                    if remove_repetitions and i != 0 and char == idx2char[
                            sequence[i - 1]]:
                        pass
                    else:
                        sentence = sentence + char
            return sentence

        predicted_text = to_text(prediction[:, 0], remove_repetitions=True)
        result.append((line_img, predicted_text))

    return result
Beispiel #5
0
def main(epoch_num, lr=0.1, training=True, fix_width=True):
    """
    Main

    Args:
        training (bool, optional): If True, train the model, otherwise test it (default: True)
        fix_width (bool, optional): Scale images to fixed size (default: True)
    """

    model_path = ('fix_width_' if fix_width else '') + 'crnn.pth'
    letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
    root = 'data/IIIT5K/'
    if training:
        net = CRNN(1, len(letters) + 1)
        start_epoch = 0
        # if there is pre-trained model, load it
        if os.path.exists(model_path):
            print('Pre-trained model detected.\nLoading model...')
            net.load_state_dict(torch.load(model_path))
        if torch.cuda.is_available():
            print('GPU detected.')
        net = train(root,
                    start_epoch,
                    epoch_num,
                    letters,
                    net=net,
                    lr=lr,
                    fix_width=fix_width)
        # save the trained model for training again
        torch.save(net.state_dict(), model_path)
        # test
        test(root, net, letters, fix_width=fix_width)
    else:
        net = CRNN(1, len(letters) + 1)
        if os.path.exists(model_path):
            net.load_state_dict(torch.load(model_path))
        test(root, net, letters, fix_width=fix_width)
Beispiel #6
0
import os
import torch
import cv2
from crnn import CRNN
from tqdm import tqdm

import csv
import numpy as np

model = CRNN()
model.load_state_dict(torch.load('55acc.pt'))
model.eval()
model.to('cuda')

data_dir = "qia2020/test/"
emo = {0: 'hap', 1: 'sur', 2: 'neu', 3: 'fea', 4: 'dis', 5: 'ang', 6: 'sad'}

with open('test_confirm.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['FileID', 'Emotion'])
    for filename in tqdm(sorted(os.listdir(data_dir))):
        if not filename.endswith(".mp4"):
            continue

        f = 'torch_video_3_test/' + filename[:5] + '.pt'

        X = torch.load(f)
        X = X.unsqueeze(0).to('cuda:0')

        with np.load(data_dir + filename[:5] + '.npz') as data:
            T = torch.Tensor(data['word_embed'])
Beispiel #7
0
           opt.nrnn,
           opt.dropout,
           opt.variational_dropout,
           leakyRelu=True)
print(net)
params = net.state_dict()
params_shape = []
for k, v in params.items():
    #    print(k, v.numpy().shape, reduce(mul, v.numpy().shape))
    params_shape.append(reduce(mul, v.numpy().shape))
params_total = sum(params_shape)
print('params_total:', params_total)

if opt.finetune:
    print('Loading model from', opt.modeldir + opt.modelname)
    net.load_state_dict(torch.load(opt.modeldir + opt.modelname))
else:
    print('create new model')
    net.apply(weights_init)

if opt.ngpu > 1:
    # print("Let's use", torch.cuda.device_count(), "GPUs!")
    net = nn.DataParallel(net, device_ids=range(opt.ngpu))
net.cuda()
criterion = CTCLoss().cuda()

if opt.adadelta:
    optimizer = optim.Adadelta(net.parameters(),
                               lr=opt.lr)  # , weight_decay=1e-8)
elif opt.rms:
    optimizer = optim.RMSprop(net.parameters(), lr=opt.lr)
Beispiel #8
0
def train_cs2s(path=None):
    alphabet = string.printable
    nclass = len(alphabet)

    writer = SummaryWriter()
    dataset = FakeTextImageGenerator(batch_size=4).iter()

    criterion = CrossEntropyLoss(ignore_index=97)

    encoder = Encoder(512, 512, 1, 0)
    decoder = Decoder(512, 100, 100, 1, 0)
    net = ConvSeq2Seq(encoder, decoder, nclass=nclass).float()

    optimizer = optim.Adam(net.parameters(), lr=0.003)

    if path:
        net2 = CRNN(nclass=100).float()
        checkpoint = torch.load(path)
        net2.load_state_dict(checkpoint["model_state_dict"])
        # optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
        # epoch = checkpoint["epoch"]
        # loss = checkpoint["loss"]
        # print(f"model current epoch: {epoch} with loss: {loss}")
        print(net2)

        net.conv1.load_state_dict(net2.conv1.state_dict())
        net.conv2.load_state_dict(net2.conv2.state_dict())
        net.conv3.load_state_dict(net2.conv3.state_dict())
        net.conv4.load_state_dict(net2.conv4.state_dict())
        net.conv5.load_state_dict(net2.conv5.state_dict())
        net.conv6.load_state_dict(net2.conv6.state_dict())
        net.conv7.load_state_dict(net2.conv7.state_dict())
    net.train()

    # loop over the dataset multiple times
    step = 0
    for epoch in range(1, 1000):
        running_loss = 0.0
        loop = tqdm(range(100))
        for i in loop:
            data = next(dataset)
            images = data["the_inputs"]
            labels = data["the_labels"]
            input_length = data["input_length"]
            label_length = data["label_length"]
            targets = data["targets"]

            # print("target", targets)
            # print("target l", targets.size())
            # print("label_l", label_length)
            # print("label_l l", label_length.size())
            # print("pred_l", input_length)
            # print("pred_l l", input_length.size())

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(images.float(), labels, 0.5)
            # permute batchsize and seq_len dim to match labels when using .view(-1, output.size()[2])
            outputs = outputs.permute(1, 0, 2)
            # print(outputs[8, 0, :])
            # print(outputs[:, 0, :])
            # print(outputs.size())
            # print(labels.size())
            output_argmax = outputs.argmax(2)
            # print(output_argmax.view(-1))
            # print(labels.reshape(-1))
            loss = criterion(outputs.reshape(-1, 100), labels.reshape(-1))

            writer.add_scalar("loss", loss.item(), step)
            step += 1
            loss.backward()
            # torch.nn.utils.clip_grad_norm_(net.parameters(), 1)
            optimizer.step()

            running_loss += loss.item()

            loop.set_postfix(epoch=epoch, Loss=(running_loss / (i + 1)))

        # print(f"Epoch: {epoch} | Loss: {running_loss/100}")
        torch.save(
            {
                "epoch": epoch,
                "model_state_dict": net.state_dict(),
                "optimizer_state_dict": optimizer.state_dict(),
                "loss": running_loss,
            },
            "cs2s_good.pt",
        )
        torch.save(net, "model_test_pretrained.pt")

    print("Finished Training")
Beispiel #9
0
def train(path=None):
    dataset = FakeTextImageGenerator(batch_size=16).iter()

    criterion = CTCLoss(reduction="mean", zero_infinity=True)

    net = CRNN(nclass=100).float()
    optimizer = optim.Adam(net.parameters(), lr=0.001)

    if path:
        checkpoint = torch.load(path)
        net.load_state_dict(checkpoint["model_state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
        epoch = checkpoint["epoch"]
        loss = checkpoint["loss"]
        print(f"model current epoch: {epoch} with loss: {loss}")

    # loop over the dataset multiple times
    for epoch in range(1, 1000):
        running_loss = 0.0
        loop = tqdm(range(100))
        for i in loop:
            data = next(dataset)
            images = data["the_inputs"]
            labels = data["the_labels"]
            input_length = data["input_length"]
            label_length = data["label_length"]
            targets = data["targets"]

            # print("target", targets)
            # print("target l", targets.size())
            # print("label_l", label_length)
            # print("label_l l", label_length.size())
            # print("pred_l", input_length)
            # print("pred_l l", input_length.size())

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(images.float())
            # print(outputs[8, 0, :])
            # print(outputs[:, 0, :])
            # print(outputs.size())
            loss = criterion(outputs, labels, input_length, label_length)

            # print(loss.item())
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            loop.set_postfix(epoch=epoch, loss=(running_loss / (i + 1)))

        # print(f"Epoch: {epoch} | Loss: {running_loss/100}")
        torch.save(
            {
                "epoch": epoch,
                "model_state_dict": net.state_dict(),
                "optimizer_state_dict": optimizer.state_dict(),
                "loss": running_loss,
            },
            "checkpoint5.pt",
        )

    print("Finished Training")
Beispiel #10
0
num_classes = 31
rnn_input_size = int((inp_size - conv_kernel + 2 * padding) / stride +
                     1) * channels_out

print(f"INPUT SIZE: {inp_size}")
print(f"RNN INPUT SIZE: {rnn_input_size}")
sys.exit(0)

model = CRNN(conv_kernel, channels_out, rnn_input_size, hidden_neurons_1,
             hidden_neurons_2, fc1, num_classes)
print("MODEL ARCHITECTURE:")
print(model)
print("Load trained model weights...")

model_path = "trained_models/128_hidden/run2/best_model_1fc.pt"
model.load_state_dict(torch.load(model_path))
model.eval()
print("Loaded trained model weights successfully!")

#######################################
################## 2. #################
#######################################
os.makedirs("activations", exist_ok=True)
for i, (data, true_labels) in enumerate(testing_dataloader):

    data = data.type(torch.FloatTensor)
    true_labels = true_labels.type(torch.LongTensor)

    stg_activations, tp_activations, ifg_activations, word_predictions = model.out(
        data)
Beispiel #11
0
    classname = m.__class__.__name__
    if classname.find("Conv") != -1:
        torch.nn.init.normal_(m.weight.data, 0.0, 0.02).cuda()
        if hasattr(m, "bias") and m.bias is not None:
            torch.nn.init.constant_(m.bias.data, 0.0).cuda()
    elif classname.find("BatchNorm2d") != -1:
        torch.nn.init.normal_(m.weight.data, 1.0, 0.02).cuda()
        torch.nn.init.constant_(m.bias.data, 0.0).cuda()
        # weitghts initalize


weights_init(model)

#load saved model
if opt.savedmodel != "save":
    model.load_state_dict(torch.load("savedmodel/%s.pth" % opt.savedmodel))
#create optimizer
optimizer = torch.optim.Adadelta(model.parameters())
#dataloader
traindata = DataLoader(dataprocessing.ImageDataset(opt.dataroot, mode="train"),
                       batch_size=opt.batchsize,
                       shuffle=True)
print(len(traindata))
testdata = DataLoader(dataprocessing.ImageDataset(opt.dataroot, mode="test"),
                      batch_size=opt.batchsize,
                      shuffle=True)
print(len(testdata))
# Loss function
lossfunction = CTCLoss()
process = dataprocessing.ProcessText(opt.alphabet)
# tensorboard
Beispiel #12
0
from PIL import Image
from torchvision import transforms
from crnn import CRNN
import torch
from utils import Converter

print('load input image...')
image = Image.open('demo_1.png').convert('L')
transform = transforms.Compose(
    [transforms.Resize((32, 100)),
     transforms.ToTensor()])
image = transform(image)
image = image.unsqueeze(0)
image = image.cuda()

print('load trained model...')
crnn = CRNN(1, 38, 256)
crnn = crnn.cuda()
crnn.load_state_dict(torch.load('trained_model/crnn.pth'))

crnn.eval()
predicted_label = crnn(image)

_, predicted_label = predicted_label.max(2)
predicted_label = predicted_label.transpose(1, 0).contiguous().view(-1)
converter = Converter('0123456789abcdefghijklmnopqrstuvwxyz*')
predicted_length = [predicted_label.size(0)]
predicted_label = converter.decode(predicted_label,
                                   predicted_length,
                                   raw=False)
print('predicted label: %s' % (predicted_label))