コード例 #1
0
ファイル: model_loader.py プロジェクト: Anmolbansal1/OCR
def load_model(abc, seq_proj=[0, 0], backend='resnet18', snapshot=None, cuda=False):
    net = CRNN(abc=abc, seq_proj=seq_proj, backend=backend)
    net = nn.DataParallel(net)
    if snapshot is not None:
        load_weights(net, torch.load(snapshot))
    if cuda:
        net = net.cuda()
    return net
コード例 #2
0
class PytorchOcr():
    def __init__(self, model_path):
        alphabet_unicode = config.alphabet_v2
        self.alphabet = ''.join([chr(uni) for uni in alphabet_unicode])
        # print(len(self.alphabet))
        self.nclass = len(self.alphabet) + 1
        self.model = CRNN(config.imgH, 1, self.nclass, 256)
        self.cuda = False
        if torch.cuda.is_available():
            self.cuda = True
            self.model.cuda()
            self.model.load_state_dict({
                k.replace('module.', ''): v
                for k, v in torch.load(model_path).items()
            })
        else:
            # self.model = nn.DataParallel(self.model)
            self.model.load_state_dict(
                torch.load(model_path, map_location='cpu'))
        self.model.eval()
        self.converter = strLabelConverter(self.alphabet)

    def recognize(self, img):
        h, w = img.shape[:2]
        if len(img.shape) == 3:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        image = Image.fromarray(img)
        transformer = resizeNormalize((int(w / h * 32), 32))
        image = transformer(image)
        image = image.view(1, *image.size())
        image = Variable(image)

        if self.cuda:
            image = image.cuda()

        preds = self.model(image)

        _, preds = preds.max(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)

        preds_size = Variable(torch.IntTensor([preds.size(0)]))
        txt = self.converter.decode(preds.data, preds_size.data, raw=False)

        return txt
コード例 #3
0
ファイル: ocr.py プロジェクト: undarmaa/mongolian-nlp
def load_model_from_checkpoint(checkpoint_file_name, use_gpu=False):
    """Load a pretrained CRNN model."""
    model = CRNN(line_size, 1, len(vocab), 256)
    checkpoint = torch.load(checkpoint_file_name,
                            map_location='cpu' if not use_gpu else None)
    model.load_state_dict(checkpoint['state_dict'])
    model.float()
    model.eval()
    model = model.cuda() if use_gpu else model.cpu()
    return model
コード例 #4
0
ファイル: ocr.py プロジェクト: undarmaa/mongolian-nlp
def ocr(orig_img, lines, checkpoint_file_name, use_gpu=False):
    """OCR on segmented lines."""
    model = CRNN(line_size, 1, len(vocab), 256)
    checkpoint = torch.load(checkpoint_file_name,
                            map_location='cpu' if not use_gpu else None)
    model.load_state_dict(checkpoint['state_dict'])
    model.float()
    model.eval()
    model = model.cuda() if use_gpu else model.cpu()
    torch.set_grad_enabled(False)

    result = []
    for line in lines:
        (x1, y1), (x2, y2) = line
        line_img = image_resize(np.array(np.rot90(orig_img[y1:y2, x1:x2])),
                                height=line_size)

        inputs = torch.from_numpy(line_img /
                                  255).float().unsqueeze(0).unsqueeze(0)
        outputs = model(inputs)
        prediction = outputs.softmax(2).max(2)[1]

        def to_text(tensor, max_length=None, remove_repetitions=False):
            sentence = ''
            sequence = tensor.cpu().detach().numpy()
            for i in range(len(sequence)):
                if max_length is not None and i >= max_length:
                    continue
                char = idx2char[sequence[i]]
                if char != 'B':  # ignore blank
                    if remove_repetitions and i != 0 and char == idx2char[
                            sequence[i - 1]]:
                        pass
                    else:
                        sentence = sentence + char
            return sentence

        predicted_text = to_text(prediction[:, 0], remove_repetitions=True)
        result.append((line_img, predicted_text))

    return result
コード例 #5
0
ファイル: train.py プロジェクト: LiBiying/OCR_XJTU_RPLN
    #    print(k, v.numpy().shape, reduce(mul, v.numpy().shape))
    params_shape.append(reduce(mul, v.numpy().shape))
params_total = sum(params_shape)
print('params_total:', params_total)

if opt.finetune:
    print('Loading model from', opt.modeldir + opt.modelname)
    net.load_state_dict(torch.load(opt.modeldir + opt.modelname))
else:
    print('create new model')
    net.apply(weights_init)

if opt.ngpu > 1:
    # print("Let's use", torch.cuda.device_count(), "GPUs!")
    net = nn.DataParallel(net, device_ids=range(opt.ngpu))
net.cuda()
criterion = CTCLoss().cuda()

if opt.adadelta:
    optimizer = optim.Adadelta(net.parameters(),
                               lr=opt.lr)  # , weight_decay=1e-8)
elif opt.rms:
    optimizer = optim.RMSprop(net.parameters(), lr=opt.lr)
else:
    optimizer = optim.Adam(net.parameters(),
                           lr=opt.lr,
                           betas=(0.5, 0.999),
                           weight_decay=0.003)


def val_test():
コード例 #6
0
                                 batch_size=option.batch_size,
                                 shuffle=True)
validationset = LMDBDataset(option.validationset_path,
                            transform=transforms.Compose([
                                transforms.Resize(
                                    (option.image_h, option.image_w)),
                                transforms.ToTensor()
                            ]))
validationset_dataloader = DataLoader(validationset,
                                      batch_size=option.batch_size,
                                      shuffle=True)

nc = 1
nclass = len(option.alphabet) + 1
crnn = CRNN(nc, nclass, option.nh)
crnn = crnn.cuda()


def weight_init(module):
    class_name = module.__class__.__name__
    if class_name.find('Conv') != -1:
        module.weight.data.normal_(0, 0.02)
    if class_name.find('BatchNorm') != -1:
        module.weight.data.normal_(1, 0.02)
        module.bias.data.fill_(0)


crnn.apply(weight_init)

loss_function = CTCLoss(zero_infinity=True)
loss_function = loss_function.cuda()
コード例 #7
0
ファイル: train.py プロジェクト: maiduchoang2498/OCR-CRNN
parser.add_argument('--savedmodel',
                    type=str,
                    default="save",
                    help="directory to saved model")
parser.add_argument('--batchsize', type=int, default=64)
parser.add_argument('--alphabet',
                    type=str,
                    default='0123456789abcdefghijklmnopqrstuvwxyz')
opt = parser.parse_args()

cuda = torch.cuda.is_available()
device = torch.device('cuda')

#intialize model
model = CRNN()
model.cuda()


def weights_init(m):
    classname = m.__class__.__name__
    if classname.find("Conv") != -1:
        torch.nn.init.normal_(m.weight.data, 0.0, 0.02).cuda()
        if hasattr(m, "bias") and m.bias is not None:
            torch.nn.init.constant_(m.bias.data, 0.0).cuda()
    elif classname.find("BatchNorm2d") != -1:
        torch.nn.init.normal_(m.weight.data, 1.0, 0.02).cuda()
        torch.nn.init.constant_(m.bias.data, 0.0).cuda()
        # weitghts initalize


weights_init(model)