def eval(path="checkpoint3.pt"):
    net = CRNN(nclass=100).double()
    optimizer = optim.Adam(net.parameters())

    checkpoint = torch.load(path)
    net.load_state_dict(checkpoint["model_state_dict"])
    optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
    epoch = checkpoint["epoch"]
    loss = checkpoint["loss"]
    print(f"model current epoch: {epoch} with loss: {loss}")

    net.eval()

    while 1:
        data = next(dataset)
        images = data["the_inputs"]
        labels = data["the_labels"]
        input_length = data["input_length"]
        label_length = data["label_length"]

        preds = net(images).detach()
        pred_texts, probs = decode_batch2(preds, string.printable)
        for i in range(len(pred_texts)):
            print(pred_texts[i], probs[i])
            print(images[i].size())
Ejemplo n.º 2
0
def load_model_from_checkpoint(checkpoint_file_name, use_gpu=False):
    """Load a pretrained CRNN model."""
    model = CRNN(line_size, 1, len(vocab), 256)
    checkpoint = torch.load(checkpoint_file_name,
                            map_location='cpu' if not use_gpu else None)
    model.load_state_dict(checkpoint['state_dict'])
    model.float()
    model.eval()
    model = model.cuda() if use_gpu else model.cpu()
    return model
Ejemplo n.º 3
0
class PytorchOcr():
    def __init__(self, model_path):
        alphabet_unicode = config.alphabet_v2
        self.alphabet = ''.join([chr(uni) for uni in alphabet_unicode])
        # print(len(self.alphabet))
        self.nclass = len(self.alphabet) + 1
        self.model = CRNN(config.imgH, 1, self.nclass, 256)
        self.cuda = False
        if torch.cuda.is_available():
            self.cuda = True
            self.model.cuda()
            self.model.load_state_dict({
                k.replace('module.', ''): v
                for k, v in torch.load(model_path).items()
            })
        else:
            # self.model = nn.DataParallel(self.model)
            self.model.load_state_dict(
                torch.load(model_path, map_location='cpu'))
        self.model.eval()
        self.converter = strLabelConverter(self.alphabet)

    def recognize(self, img):
        h, w = img.shape[:2]
        if len(img.shape) == 3:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        image = Image.fromarray(img)
        transformer = resizeNormalize((int(w / h * 32), 32))
        image = transformer(image)
        image = image.view(1, *image.size())
        image = Variable(image)

        if self.cuda:
            image = image.cuda()

        preds = self.model(image)

        _, preds = preds.max(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)

        preds_size = Variable(torch.IntTensor([preds.size(0)]))
        txt = self.converter.decode(preds.data, preds_size.data, raw=False)

        return txt
Ejemplo n.º 4
0
def ocr(orig_img, lines, checkpoint_file_name, use_gpu=False):
    """OCR on segmented lines."""
    model = CRNN(line_size, 1, len(vocab), 256)
    checkpoint = torch.load(checkpoint_file_name,
                            map_location='cpu' if not use_gpu else None)
    model.load_state_dict(checkpoint['state_dict'])
    model.float()
    model.eval()
    model = model.cuda() if use_gpu else model.cpu()
    torch.set_grad_enabled(False)

    result = []
    for line in lines:
        (x1, y1), (x2, y2) = line
        line_img = image_resize(np.array(np.rot90(orig_img[y1:y2, x1:x2])),
                                height=line_size)

        inputs = torch.from_numpy(line_img /
                                  255).float().unsqueeze(0).unsqueeze(0)
        outputs = model(inputs)
        prediction = outputs.softmax(2).max(2)[1]

        def to_text(tensor, max_length=None, remove_repetitions=False):
            sentence = ''
            sequence = tensor.cpu().detach().numpy()
            for i in range(len(sequence)):
                if max_length is not None and i >= max_length:
                    continue
                char = idx2char[sequence[i]]
                if char != 'B':  # ignore blank
                    if remove_repetitions and i != 0 and char == idx2char[
                            sequence[i - 1]]:
                        pass
                    else:
                        sentence = sentence + char
            return sentence

        predicted_text = to_text(prediction[:, 0], remove_repetitions=True)
        result.append((line_img, predicted_text))

    return result
Ejemplo n.º 5
0
def infer(files, save_static_path=None):
    result_list = []
    place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace()
    print('train with {}'.format(place))
    with fluid.dygraph.guard(place):
        params, _ = fluid.load_dygraph('{}/crnn_best'.format('output/baidu_model'))#train_parameters['save_model_dir']))
        # crnn = CRNN(train_parameters["class_dim"] + 1, 1)
        crnn = CRNN(3828, 1)
        crnn.load_dict(params)
        crnn.eval()
        for file in tqdm(files):
            img = precess_img(file)
            img = fluid.dygraph.to_variable(img).astype('float32')
            if save_static_path is not None:
                out_dygraph, static_layer = TracedLayer.trace(crnn, inputs=[img])
                # 将转换后的模型保存
                static_layer.save_inference_model(save_static_path, feed=[0], fetch=[0])
            pred = crnn(img)
            output = utils.greedy_decode(pred.numpy(), blank=train_parameters["class_dim"])
            p_s = "".join([train_parameters['r_label_dict'][c] for c in output[0]])
            result_list.append('{0}\t{1}'.format(os.path.basename(file), p_s))
            break
    return result_list
Ejemplo n.º 6
0
import os
import torch
import cv2
from crnn import CRNN
from tqdm import tqdm

import csv
import numpy as np

model = CRNN()
model.load_state_dict(torch.load('55acc.pt'))
model.eval()
model.to('cuda')

data_dir = "qia2020/test/"
emo = {0: 'hap', 1: 'sur', 2: 'neu', 3: 'fea', 4: 'dis', 5: 'ang', 6: 'sad'}

with open('test_confirm.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['FileID', 'Emotion'])
    for filename in tqdm(sorted(os.listdir(data_dir))):
        if not filename.endswith(".mp4"):
            continue

        f = 'torch_video_3_test/' + filename[:5] + '.pt'

        X = torch.load(f)
        X = X.unsqueeze(0).to('cuda:0')

        with np.load(data_dir + filename[:5] + '.npz') as data:
            T = torch.Tensor(data['word_embed'])
Ejemplo n.º 7
0
def train():
    epoch_num = train_parameters["num_epochs"]
    batch_size = train_parameters["train_batch_size"]

    place = fluid.CUDAPlace(
        0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace()
    logger.info('train with {}'.format(place))
    with fluid.dygraph.guard(place):
        # 数据加载
        file_list = open(train_parameters['train_list']).readlines()
        train_reader = get_loader(
            file_list=file_list,
            input_size=train_parameters['input_size'],
            max_char_per_line=train_parameters['max_char_per_line'],
            mean_color=train_parameters['mean_color'],
            batch_size=train_parameters['train_batch_size'],
            mode='train',
            label_dict=train_parameters['label_dict'],
            place=place)

        batch_num = len(train_reader())

        crnn = CRNN(train_parameters["class_dim"] + 1, batch_size=batch_size)
        total_step = batch_num * epoch_num
        LR = train_parameters['learning_rate']
        lr = fluid.layers.polynomial_decay(LR, total_step, 1e-7, power=0.9)
        # lr = fluid.layers.piecewise_decay([total_step // 3, total_step * 2 // 3], [LR, LR * 0.1, LR * 0.01])
        optimizer = fluid.optimizer.Adam(learning_rate=lr,
                                         parameter_list=crnn.parameters())

        if train_parameters["continue_train"]:
            # 加载上一次训练的模型,继续训练
            params_dict, opt_dict = fluid.load_dygraph('{}/crnn_latest'.format(
                train_parameters['save_model_dir']))
            crnn.set_dict(params_dict)
            optimizer.set_dict(opt_dict)
            logger.info("load model from {}".format(
                train_parameters['save_model_dir']))

        current_best = -1
        start_epoch = 0
        for epoch in range(start_epoch, epoch_num):
            crnn.train()
            tic = time.time()
            for batch_id, (img, label, label_len) in enumerate(train_reader()):
                out = crnn(img)

                out_for_loss = fluid.layers.transpose(out, [1, 0, 2])
                input_length = np.array([out.shape[1]] *
                                        out.shape[0]).astype("int64")
                input_length = fluid.dygraph.to_variable(input_length)
                input_length.stop_gradient = True
                loss = fluid.layers.warpctc(
                    input=out_for_loss,
                    label=label.astype(np.int32),
                    input_length=input_length,
                    label_length=label_len,
                    blank=train_parameters["class_dim"],
                    norm_by_times=True)
                avg_loss = fluid.layers.reduce_mean(loss)

                cur_acc_num, cur_all_num = acc_batch(out.numpy(),
                                                     label.numpy())
                if batch_id % 1 == 0:
                    logger.info(
                        "epoch [{}/{}], step [{}/{}], loss: {:.6f}, acc: {:.4f}, lr: {}, time: {:.4f}"
                        .format(epoch, epoch_num, batch_id, batch_num,
                                avg_loss.numpy()[0], cur_acc_num / cur_all_num,
                                optimizer.current_step_lr(),
                                time.time() - tic))
                    tic = time.time()
                avg_loss.backward()
                optimizer.minimize(avg_loss)
                crnn.clear_gradients()

            fluid.save_dygraph(
                crnn.state_dict(),
                '{}/crnn_latest'.format(train_parameters['save_model_dir']))
            fluid.save_dygraph(
                optimizer.state_dict(),
                '{}/crnn_latest'.format(train_parameters['save_model_dir']))
            crnn.eval()
            ratio = eval_model(crnn, place=place)
            if ratio >= current_best:
                fluid.save_dygraph(
                    crnn.state_dict(),
                    '{}/crnn_best'.format(train_parameters['save_model_dir']))
                fluid.save_dygraph(
                    optimizer.state_dict(),
                    '{}/crnn_best'.format(train_parameters['save_model_dir']))
                current_best = ratio
                logger.info("save model to {}, current best acc:{:.2f}".format(
                    train_parameters['save_model_dir'], ratio))
    logger.info("train end")
Ejemplo n.º 8
0
from PIL import Image
from torchvision import transforms
from crnn import CRNN
import torch
from utils import Converter

print('load input image...')
image = Image.open('demo_1.png').convert('L')
transform = transforms.Compose(
    [transforms.Resize((32, 100)),
     transforms.ToTensor()])
image = transform(image)
image = image.unsqueeze(0)
image = image.cuda()

print('load trained model...')
crnn = CRNN(1, 38, 256)
crnn = crnn.cuda()
crnn.load_state_dict(torch.load('trained_model/crnn.pth'))

crnn.eval()
predicted_label = crnn(image)

_, predicted_label = predicted_label.max(2)
predicted_label = predicted_label.transpose(1, 0).contiguous().view(-1)
converter = Converter('0123456789abcdefghijklmnopqrstuvwxyz*')
predicted_length = [predicted_label.size(0)]
predicted_label = converter.decode(predicted_label,
                                   predicted_length,
                                   raw=False)
print('predicted label: %s' % (predicted_label))