Exemple #1
0
    def dataloader(self, alphabet):
        # train_transform = transforms.Compose(
        #     [transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5),
        #     resizeNormalize(args.imgH)])
        # train_dataset = BaseDataset(args.train_dir, alphabet, transform=train_transform)
        train_dataset = NumDataset(args.train_dir,
                                   alphabet,
                                   transform=resizeNormalize(args.imgH))
        train_dataloader = DataLoader(dataset=train_dataset,
                                      batch_size=args.batch_size,
                                      shuffle=True,
                                      num_workers=args.num_workers,
                                      pin_memory=True)

        if os.path.exists(args.val_dir):
            # val_dataset = BaseDataset(args.val_dir, alphabet, transform=resizeNormalize(args.imgH))
            val_dataset = NumDataset(args.val_dir,
                                     alphabet,
                                     mode='test',
                                     transform=resizeNormalize(args.imgH))
            val_dataloader = DataLoader(dataset=val_dataset,
                                        batch_size=args.batch_size,
                                        shuffle=False,
                                        num_workers=args.num_workers,
                                        pin_memory=True)
        else:
            val_dataloader = None

        return train_dataloader, val_dataloader
Exemple #2
0
def predict(image):
    """
    加载crnn模型,做ocr识别
    """
    scale = image.size[1] * 1.0 / 32
    w = image.size[0] / scale
    w = int(w)
    # print "im size:{}, {}".format(image.size, w)
    transformer = dataset.resizeNormalize((w, 32))
    if torch.cuda.is_available() and GPU:
        image = transformer(image).cuda()
    else:
        image = transformer(image).cpu()

    image = image.view(1, *image.size())
    image = Variable(image)
    model.eval()
    preds = model(image)
    _, preds = preds.max(2)
    preds = preds.transpose(1, 0).contiguous().view(-1)
    preds_size = Variable(torch.IntTensor([preds.size(0)]))
    sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
    if len(sim_pred) > 0:
        if sim_pred[0] == u'-':
            sim_pred = sim_pred[1:]

    return sim_pred
Exemple #3
0
def batch_test(dirpath):
	alphabet = keys_crnn.alphabet
	#print(len(alphabet))
	#input('\ninput:')
	converter = util.strLabelConverter(alphabet)
	# model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1).cuda()
	model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1)
	path = './samples/model_acc97.pth'
	model.load_state_dict(torch.load(path))
	#print(model)
	paths=glob.glob(os.path.join(dirpath,'*.[jp][pn]g'))
	for i in paths:
		print(i)
		image = Image.open(i).convert('L')
		#print(image.size)
		scale = image.size[1] * 1.0 / 32
		w = image.size[0] / scale
		w = int(w)
		#print("width:" + str(w))
		transformer = dataset.resizeNormalize((w, 32))
		# image = transformer(image).cuda()
		image = transformer(image)
		image = image.view(1, *image.size())
		image = Variable(image)
		model.eval()
		preds = model(image)
		#print(preds.shape)
		_, preds = preds.max(2)
		#print(preds.shape)
		preds = preds.squeeze(1)
		preds = preds.transpose(-1, 0).contiguous().view(-1)
		preds_size = Variable(torch.IntTensor([preds.size(0)]))
		raw_pred = converter.decode(preds.data, preds_size.data, raw=True)
		sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
		print(sim_pred)
Exemple #4
0
def data_loader():
    # train
    transform = torchvision.transforms.Compose(
        [ImgAugTransform(), GridDistortion(prob=0.65)])
    train_dataset = dataset.lmdbDataset(root=args.trainroot,
                                        transform=transform)
    assert train_dataset
    if not params.random_sample:
        sampler = dataset.randomSequentialSampler(train_dataset,
                                                  params.batchSize)
    else:
        sampler = None
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=params.batchSize, \
            shuffle=True, sampler=sampler, num_workers=int(params.workers), \
            collate_fn=dataset.alignCollate(imgH=params.imgH, imgW=params.imgW, keep_ratio=params.keep_ratio))

    # val
    transform = torchvision.transforms.Compose(
        [dataset.resizeNormalize((params.imgW, params.imgH))])
    val_dataset = dataset.lmdbDataset(root=args.valroot, transform=transform)
    assert val_dataset
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             shuffle=True,
                                             batch_size=params.batchSize,
                                             num_workers=int(params.workers))

    return train_loader, val_loader
Exemple #5
0
def crnn_recognition(imgpth, model, tesing_dataset, total_correct_num,
                     total_string_length):
    cropped_image = Image.open(imgpth)

    converter = utils.strLabelConverter(alphabet)

    image = cropped_image.convert('L')

    ##
    w = int(image.size[0] / (280 * 1.0 / 180))
    # w = image.size[0]
    # w = int(image.size[0] / (32 * 1.0 / image.size[1]))
    transformer = dataset.resizeNormalize((w, 32))
    image = transformer(image)
    if torch.cuda.is_available():
        image = image.cuda()
    image = image.view(1, *image.size())
    image = Variable(image)

    model.eval()
    preds = model(image)

    _, preds = preds.max(2)
    preds = preds.transpose(1, 0).contiguous().view(-1)

    preds_size = Variable(torch.IntTensor([preds.size(0)]))
    sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
    ground_truth = tesing_dataset.get(imgpth)
    correct_num = int(
        len(ground_truth) *
        textdistance.levenshtein.normalized_similarity(ground_truth, sim_pred))
    string_length = len(ground_truth)
    #check = ground_truth == sim_pred
    print('results: {0},  gt: {1}'.format(sim_pred, ground_truth))
    return correct_num, string_length
def model_predict(img_path, loadmodel):


    converter = utils.strLabelConverter(alphabet)

    transformer = dataset.resizeNormalize((100, 32))
    image = Image.open(img_path).convert('L')
    image = transformer(image)
    if torch.cuda.is_available():
        image = image.cuda()
    image = image.view(1, *image.size())
    image = Variable(image)

    model.eval()
    preds = model(image)

    _, preds = preds.max(2)
    preds = preds.transpose(1, 0).contiguous().view(-1)

    preds_size = Variable(torch.IntTensor([preds.size(0)]))
    raw_pred = converter.decode(preds.data, preds_size.data, raw=True)
    sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
    print('%-20s => %-20s' % (raw_pred, sim_pred))

    return sim_pred
Exemple #7
0
def crnn_single_test(cropped_image, model):

    converter = utils.strLabelConverter(alphabet)

    image = cropped_image.convert('L')

    ##
    w = int(image.size[0] / (280 * 1.0 / 180))
    # w = image.size[0]
    # w = int(image.size[0] / (32 * 1.0 / image.size[1]))
    transformer = dataset.resizeNormalize((w, 32))
    image = transformer(image)
    if torch.cuda.is_available():
        image = image.cuda()
    image = image.view(1, *image.size())
    image = Variable(image)

    model.eval()
    preds = model(image)

    _, preds = preds.max(2)
    preds = preds.transpose(1, 0).contiguous().view(-1)

    preds_size = Variable(torch.IntTensor([preds.size(0)]))
    sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
    print('results: {0}'.format(sim_pred))
Exemple #8
0
def data_loader():
    # train
    train_dataset = dataset.lmdbDataset(root=args.trainroot,
                                        transform=dataset.customResize())
    assert train_dataset
    if not params.random_sample:
        sampler = dataset.randomSequentialSampler(train_dataset,
                                                  params.batchSize)
    else:
        sampler = None
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=params.batchSize, \
            shuffle=True, sampler=sampler, num_workers=int(params.workers), \
            collate_fn=dataset.alignCollate(imgH=params.imgH, imgW=params.imgW))

    # val
    val_dataset = dataset.lmdbDataset(root=args.valroot,
                                      transform=dataset.resizeNormalize(
                                          (params.imgW, params.imgH)))
    assert val_dataset
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             shuffle=True,
                                             batch_size=params.batchSize,
                                             num_workers=int(params.workers))

    return train_loader, val_loader
def crnn_recognition(cropped_image, model):

    converter = utils.strLabelConverter(alphabet)

    image = cropped_image.convert('L')

    ##
    w = int(image.size[0] / (280 * 1.0 / 160))
    transformer = dataset.resizeNormalize((w, 32))
    image = transformer(image)
    if torch.cuda.is_available():
        image = image.cuda()
    image = image.view(1, *image.size())
    image = Variable(image)

    model.eval()
    preds = model(image)

    _, preds = preds.max(2)
    preds = preds.transpose(1, 0).contiguous().view(-1)

    preds_size = Variable(torch.IntTensor([preds.size(0)]))
    i = 0
    #print(preds_size.data[0])
    out = ''
    while i < preds_size.data[0]:
        if preds.data[i] is not 0:
            out += alphabet[preds.data[i] - 1]
        i += 1
    print(out)
Exemple #10
0
def recognize(image_path, alphabet, snapshot, gpu):
    model = crnn.CRNN(32, 1, 37, 256)
    if torch.cuda.is_available():
        model = model.cuda()
    print('loading pretrained model from %s' % snapshot)
    model.load_state_dict(torch.load(snapshot))
    converter = utils.strLabelConverter(alphabet)
    transformer = dataset.resizeNormalize((100, 32))

    image = Image.open(image_path).convert('L')
    image = transformer(image)
    if torch.cuda.is_available():
        image = image.cuda()
    image = image.view(1, *image.size())
    image = Variable(image)

    model.eval()
    preds = model(image)

    _, preds = preds.max(2)
    preds = preds.transpose(1, 0).contiguous().view(-1)

    preds_size = Variable(torch.IntTensor([preds.size(0)]))
    raw_pred = converter.decode(preds.data, preds_size.data, raw=True)
    sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
    print('%-20s => %-20s' % (raw_pred, sim_pred))

    return sim_pred
def crnn_recognition(cropped_image, model):

    converter = utils.strLabelConverter(alphabet)

    image = cropped_image.convert('L')

    ##
    w = int(image.size[0] / (280 * 1.0 / params.imgW))
    transformer = dataset.resizeNormalize((w, 32))
    image = transformer(image)
    #if torch.cuda.is_available():
    #image = image.cuda()
    image = image.view(1, *image.size())
    #image = Variable(image)

    model.eval()
    preds = model(image)
    print("preds first=", preds.size())
    _, preds = preds.max(2)
    print("preds pre=", preds.size())
    preds = preds.transpose(1, 0).contiguous().view(-1)

    print("preds size=", preds.size())
    #preds_size = Variable(torch.IntTensor([preds.size(0)]))
    preds_size = torch.IntTensor([preds.size(0)])
    #raw_pred = converter.decode(preds.data, preds_size.data, raw=True)
    sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
    #print('%-20s => %-20s' % (raw_pred, sim_pred))
    print('results: {0}'.format(sim_pred))
    def predict(self, image):
        img_w = 32 * image.size[0] // image.size[1]  #维持固定宽高比
        transformer = dataset.resizeNormalize((img_w, 32))
        image = transformer(image)

        if torch.cuda.is_available():
            image = image.cuda()

        image = image.view(1, *image.size())
        image = Variable(image)

        if image.size()[-1] < 8:
            return ''

        preds = self(image)
        max_val, preds = preds.max(2)
        preds = preds.view(-1)

        preds_size = Variable(torch.IntTensor([preds.size(0)]))
        raw_pred = self.converter.decode(preds.data, preds_size.data, raw=True)
        sim_pred = self.converter.decode(preds.data,
                                         preds_size.data,
                                         raw=False)

        #sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
        return preds, raw_pred, sim_pred
Exemple #13
0
    def __init__(self, args):
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpus
        self.args = args
        self.alphabet = alphabetChinese
        nclass = len(self.alphabet) + 1
        nc = 1
        self.net = CRNN(args.imgH, nc, args.nh, nclass)
        self.converter = utils.strLabelConverter(self.alphabet, ignore_case=False)
        self.transformer = resizeNormalize(args.imgH)

        print('loading pretrained model from %s' % args.model_path)
        checkpoint = torch.load(args.model_path)
        if 'model_state_dict' in checkpoint.keys():
            checkpoint = checkpoint['model_state_dict']
        from collections import OrderedDict
        model_dict = OrderedDict()
        for k, v in checkpoint.items():
            if 'module' in k:
                model_dict[k[7:]] = v
            else:
                model_dict[k] = v
        self.net.load_state_dict(model_dict)

        if args.cuda and torch.cuda.is_available():
            print('available gpus is,', torch.cuda.device_count())
            self.net = torch.nn.DataParallel(self.net, output_dim=1).cuda()
        
        self.net.eval()
def crnn_recognition(cropped_image, model):

    converter = utils.strLabelConverter(alphabet)

    image = cropped_image.convert('L')

    ##
    w = int(image.size[0] / (280 * 1.0 / 160))
    transformer = dataset.resizeNormalize((w, 32))
    image = transformer(image)
    if torch.cuda.is_available():
        image = image.cuda()
    image = image.view(1, *image.size())
    image = Variable(image)

    model.eval()
    preds = model(image)

    _, preds = preds.max(2)
    preds = preds.transpose(1, 0).contiguous().view(-1)

    preds_size = Variable(torch.IntTensor([preds.size(0)]))
    sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
    f = open('test.txt', 'w')  # 若是'wb'就表示写二进制文件
    f.write('results: {0}'.format(sim_pred))
    f.close()
    print('results: {0}'.format(sim_pred))
Exemple #15
0
def crnnOcr(image):
    """
       crnn模型,ocr识别
       @@model,
       @@converter,
       @@im
       @@text_recs:text box

       """
    scale = image.size[1] * 1.0 / 32
    w = image.size[0] / scale
    w = int(w)
    #print "im size:{},{}".format(image.size,w)
    transformer = dataset.resizeNormalize((w, 32))
    if torch.cuda.is_available() and GPU:
        image = transformer(image).cuda()
    else:
        image = transformer(image).cpu()

    image = image.view(1, *image.size())
    image = Variable(image)
    model.eval()
    preds = model(image)
    _, preds = preds.max(2)
    #preds = preds.squeeze(2)
    preds = preds.transpose(1, 0).contiguous().view(-1)
    preds_size = Variable(torch.IntTensor([preds.size(0)]))
    #raw_pred = converter.decode(preds.data, preds_size.data, raw=True)
    sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
    if sim_pred[0] == u'-':
        sim_pred = sim_pred[1:]

    return sim_pred
Exemple #16
0
def crnn_recognition(cropped_image, model):

    converter = utils.strLabelConverter(alphabet)
  
    image = cropped_image.convert('L')
    #print("image size=",image.size[0]) #image shape = (w,h)
    ## 
    w = int(image.size[0] / (280 * 1.0 / params.imgW))#image.size[0] is W
    #print("w=",w)
    transformer = dataset.resizeNormalize((w, 32)) #format is CHW because it is a tensor
    image = transformer(image)# image represents a tensor, shape is CHW
    #print("image resize=",image.shape)
    #if torch.cuda.is_available():
        #image = image.cuda()
    image = image.view(1, *image.size())
    #print("image=",image.shape)
    #image = Variable(image)
    #print("model:",model)

    model.eval()
    preds = model(image)
    #print("preds:",preds)

    _, preds = preds.max(2)
    preds = preds.transpose(1, 0).contiguous().view(-1)

    preds_size = torch.IntTensor([preds.size(0)])
    #preds_size = torch.IntTensor([preds.size(0)])
    #raw_pred = converter.decode(preds.data, preds_size.data, raw=True)
    sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
    #print('%-20s => %-20s' % (raw_pred, sim_preda
    print('result:{0}'.format(sim_pred))
    return ('{0}'.format(sim_pred))
def crnn_recognition(cropped_image, model):

    converter = utils.strLabelConverter(alphabet)

    image = cropped_image.convert('L')

    ## In training step, 280 is the width of training image, and resize it to 160 then feed into neural networks.
    ## Hence in test step, the width of test_image should multipy by the scale in traning step we resize.
    w = int(image.size[0] / (280 * 1.0 / 160))
    transformer = dataset.resizeNormalize((w, 32))
    image = transformer(image)
    if torch.cuda.is_available():
        image = image.cuda()
    image = image.view(1, *image.size())
    image = Variable(image)

    model.eval()
    preds = model(image)

    _, preds = preds.max(2)
    preds = preds.transpose(1, 0).contiguous().view(-1)

    preds_size = Variable(torch.IntTensor([preds.size(0)]))
    sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
    print('results: {0}'.format(sim_pred))
def show_predict(image_directory, filename):
    output_dir = 'static/images/cropped_craft'
    prediction_result = craft.detect_text(image_directory,
                                          output_dir,
                                          crop_type='polly',
                                          export_extra=True,
                                          refiner=False,
                                          cuda=True)
    cropped_dir = output_dir + "/" + filename[:-4] + "_crops"
    transformer = dataset.resizeNormalize((100, 32))
    predicted_text = ""

    for cropped in listdir(cropped_dir):
        img = cropped_dir + "/" + cropped
        image = Image.open(img).convert("L")
        image = transformer(image)

        if torch.cuda.is_available():
            image = image.cuda()

        image = image.view(1, *image.size())
        image = Variable(image)
        model.eval()
        preds = model(image)
        _, preds = preds.max(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)

        preds_size = Variable(torch.IntTensor([preds.size(0)]))
        sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
        predicted_text = predicted_text + sim_pred[2:len(predicted_text) -
                                                   1] + "\n"

    return predicted_text, output_dir
Exemple #19
0
def test(model, data_path, max_iter=100):
    test_dataset = dataset.listDataset(list_file=data_path,
                                       transform=dataset.resizeNormalize(
                                           (100, 32)))

    image = torch.FloatTensor(64, 3, 32, 32)
    text = torch.LongTensor(64 * 5)
    length = torch.IntTensor(64)
    image = Variable(image)
    text = Variable(text)
    length = Variable(length)

    print('Start test')
    # for p in crnn.parameters():
    #     p.requires_grad = False

    length = torch.IntTensor(1)
    length[0] = 7
    length = Variable(length)

    model.eval()
    data_loader = torch.utils.data.DataLoader(test_dataset,
                                              shuffle=True,
                                              batch_size=64,
                                              num_workers=int(2))
    test_iter = iter(data_loader)

    i = 0
    n_correct = 0
    loss_avg = utils.averager()

    max_iter = min(max_iter, len(data_loader))
    for i in range(max_iter):
        data = test_iter.next()
        i += 1
        cpu_images, cpu_texts = data
        print('cpu_image:', cpu_images.size())
        batch_size = cpu_images.size(0)
        utils.loadData(image, cpu_images)
        t, l = converter.encode(cpu_texts)
        utils.loadData(text, t)
        utils.loadData(length, l)

        preds = model(image, length)

        _, preds = preds.max(1)
        preds = preds.view(-1)
        sim_preds = converter.decode(preds.data, length.data)
        for pred, target in zip(sim_preds, cpu_texts):
            target = ''.join(target.split(':'))
            if pred == target:
                n_correct += 1

    for pred, gt in zip(sim_preds, cpu_texts):
        gt = ''.join(gt.split(':'))
        print('%-20s, gt: %-20s' % (pred, gt))

    accuracy = n_correct / float(max_iter * 64)
    print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
Exemple #20
0
def read_image(path):
    transformer = dataset.resizeNormalize((100, 32))
    image = Image.open(path).convert('L')
    image = transformer(image)
    if torch.cuda.is_available():
        image = image.cuda()
    image = image.view(1, *image.size())
    image = Variable(image)
    return image
Exemple #21
0
def initValDataSets():
    index = 0
    list_name = []
    if os.path.exists(val_path + "/data.mdb"):
        one_dataset = dataset.lmdbDataset(root=val_path,
                                          transform=dataset.resizeNormalize(
                                              (100, 32)))

        val_data = {
            "dir": val_path,
            "dataset": one_dataset,
            # "loader": one_loader,
            "index": index
        }
        val_data_list.append(val_data)
        list_name.append(val_path)
    else:
        fs = os.listdir(val_path)
        for one in fs:
            root_path = val_path + "/" + one + "/val"
            if not os.path.exists(root_path) or not os.path.exists(
                    val_path + "/" + one + "/val/data.mdb"):
                if os.path.exists(val_path + "/" + one + "/data.mdb"):
                    root_path = val_path + "/" + one
                else:
                    continue
            # print("添加校验数据集:{}".format(root_path))
            one_dataset = dataset.lmdbDataset(
                root=root_path, transform=dataset.resizeNormalize((100, 32)))

            # one_loader = torch.utils.data.DataLoader(one_dataset, shuffle=True, batch_size=opt.batchSize,
            #                                          num_workers=int(opt.workers))
            val_data = {
                "dir": one,
                "dataset": one_dataset,
                # "loader": one_loader,
                "index": index
            }
            index += 1
            val_data_list.append(val_data)
            list_name.append(one)
    print_msg("加载了{}个验证集:{}".format(len(list_name), list_name))
Exemple #22
0
    def process(self, im, text_recs):
        index = 0
        sim_preds = []
        for rec in text_recs:

            if len(rec) > 8:
                top, left, bottom, right, score = rec[0], rec[1], rec[6], rec[
                    7], rec[8]
            else:
                top, left, bottom, right, score = rec
            crop_img = im[int(left):int(right), int(top):int(bottom)]
            # pt1 = (rec[0], rec[1])
            # pt2 = (rec[2], rec[3])
            # pt3 = (rec[6], rec[7])
            # pt4 = (rec[4], rec[5])
            # partImg = dumpRotateImage(im, degrees(atan2(pt2[1] - pt1[1], pt2[0] - pt1[0])), pt1, pt2, pt3, pt4)
            # # mahotas.imsave('%s.jpg'%index, partImg)

            image = Image.fromarray(crop_img).convert('L')
            # height,width,channel=partImg.shape[:3]
            # print(height,width,channel)
            # print(image.size)

            # image = Image.open('./img/t4.jpg').convert('L')
            scale = image.size[1] * 1.0 / 32
            w = image.size[0] / scale
            w = int(w)
            # print(w)

            transformer = dataset.resizeNormalize((w, 32))
            if self.gpuid == '-1':
                image = transformer(image)
            else:
                image = transformer(image).cuda()
            image = image.view(1, *image.size())
            image = Variable(image)
            self.model.eval()
            preds = self.model(image)
            _, preds = preds.max(2)
            # preds = preds.squeeze(2)
            preds = preds.transpose(1, 0).contiguous().view(-1)
            preds_size = Variable(torch.IntTensor([preds.size(0)]))
            raw_pred = self.converter.decode(preds.data,
                                             preds_size.data,
                                             raw=True)
            sim_pred = self.converter.decode(preds.data,
                                             preds_size.data,
                                             raw=False)
            # print('%-20s => %-20s' % (raw_pred, sim_pred))
            # print(index)
            # print(sim_pred)
            sim_preds.append(sim_pred)
            # index = index + 1
        return sim_preds
Exemple #23
0
def image_pil_to_logits(oracle, pil_im):
    transformer = dataset.resizeNormalize((imgW, imgH))
    image = transformer(pil_im)
    if torch.cuda.is_available():
        image = image.cuda()

    image = image.view(1, *image.size())
    image = Variable(image)

    preds = oracle(image)
    return preds
def data_loader():
    # val
    val_dataset = dataset.lmdbDataset(root=args.valroot,
                                      transform=dataset.resizeNormalize(
                                          (params.imgW, params.imgH)))
    assert val_dataset
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             shuffle=True,
                                             batch_size=params.batchSize,
                                             num_workers=int(params.workers))

    return val_loader
Exemple #25
0
    def crnnRec(self, im, text_recs, use_gpu=True):
        texts = []
        index = 0
        for rec in text_recs:
            pt1 = (rec[0], rec[1])
            pt2 = (rec[2], rec[3])
            pt3 = (rec[6], rec[7])
            pt4 = (rec[4], rec[5])
            partImg = self.dumpRotateImage(
                im, degrees(atan2(pt2[1] - pt1[1], pt2[0] - pt1[0])), pt1, pt2,
                pt3, pt4)
            #mahotas.imsave('%s.jpg'%index, partImg)

            image = Image.fromarray(partImg).convert('L')
            #height,width,channel=partImg.shape[:3]
            #print(height,width,channel)
            #print(image.size)

            #image = Image.open('./img/t4.jpg').convert('L')
            scale = image.size[1] * 1.0 / 32
            w = image.size[0] / scale
            w = int(w)
            #print(w)

            transformer = dataset.resizeNormalize((w, 32))
            image = transformer(image)
            model = self.cpu_model
            if use_gpu and torch.cuda.is_available():
                image = image.cuda()
                model = self.model

            image = image.view(1, *image.size())
            image = Variable(image)
            model.eval()
            print(type(model), type(image))
            preds = model(image)
            _, preds = preds.max(2)
            preds = preds.squeeze(0)
            preds = preds.transpose(1, 0).contiguous().view(-1)
            preds_size = Variable(torch.IntTensor([preds.size(0)]))
            raw_pred = self.converter.decode(preds.data,
                                             preds_size.data,
                                             raw=True)
            sim_pred = self.converter.decode(preds.data,
                                             preds_size.data,
                                             raw=False)
            print('%-20s => %-20s' % (raw_pred, sim_pred))
            #print(index)
            #print(sim_pred)
            index = index + 1
            texts.append(sim_pred)

        return texts
Exemple #26
0
def load_img(path):
    
    transformer = dataset.resizeNormalize((100, 32))

    
    result = []   
    for p in path: 
        image = Image.open(p).convert('L')
        image = transformer(image)
        result.append(image)
        
    return torch.stack(result)
Exemple #27
0
    def normalize_image(image):
        # Resize, antialias and transpose the image to CHW.
        #n, c, h, w = ModelData.INPUT_SHAPE
        #transformer = resizeNormalize((w, h))
        #image = transformer(image)
        #image_np = (np.asarray(image)-0.5)/0.5
        #image_np_with_batch = np.expand_dims(image_np, 0)
        transformer = dataset.resizeNormalize((100, 32))
        image = transformer(image)
        image = image.view(1, *image.size())

        #image_arr = np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel()
        # This particular ResNet50 model requires some preprocessing, specifically, mean normalization.
        return image
Exemple #28
0
    def __init__(self, weightfile, gpu_id=0):
        alphabet = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-()图'
        print(alphabet)
        print(len(alphabet))

        nclass = len(alphabet) + 1
        self.__net = crnn.CRNN(32, 1, nclass, 256)
        if torch.cuda.is_available():
            self.__net.cuda(device=gpu_id)
            self.__gpu_id = gpu_id

        self.__net.load_state_dict(torch.load(weightfile))
        self.__transformer = dataset.resizeNormalize((160, 32))
        self.__converter = utils.strLabelConverter(alphabet)
Exemple #29
0
def load_model(model_path):
    # net init
    global transformer, model, converter
    print('loading pretrained model from %s' % model_path)
    nclass = len(params.alphabet) + 1
    model = crnn.CRNN(params.imgH, params.nc, nclass, params.nh)
    if torch.cuda.is_available():
        model = model.cuda()
        model = torch.nn.DataParallel(model)
    else:
        model.load_state_dict(torch.load(model_path, map_location='cpu'))
    model.eval()
    converter = utils.strLabelConverter(params.alphabet)
    transformer = dataset.resizeNormalize((100, 32))
Exemple #30
0
def crnnRec(model, converter, im, text_recs):
    index = 0
    for rec in text_recs:
        pt1 = (rec[0], rec[1])
        pt2 = (rec[2], rec[3])
        pt3 = (rec[6], rec[7])
        pt4 = (rec[4], rec[5])
        partImg = dumpRotateImage(
            im, degrees(atan2(pt2[1] - pt1[1], pt2[0] - pt1[0])), pt1, pt2,
            pt3, pt4)
        if partImg.shape[0] == 0 or partImg.shape[1] == 0:
            return
        #mahotas.imsave('%s.jpg'%index, partImg)
        # plt.imshow(im, cmap='gray')
        # plt.plot(pt1[0], pt1[1], 'bo')
        # plt.plot(pt2[0], pt2[1], 'bo')
        # plt.plot(pt3[0], pt3[1], 'bo')
        # plt.plot(pt4[0], pt4[1], 'bo')
        # plt.show()
        # return

        image = Image.fromarray(partImg).convert('L')
        #height,width,channel=partImg.shape[:3]
        #print(height,width,channel)
        #print(image.size)

        #image = Image.open('./img/t4.jpg').convert('L')
        scale = image.size[1] * 1.0 / 32
        w = image.size[0] / scale
        w = int(w)
        #print(w)

        transformer = dataset.resizeNormalize((w, 32))
        # image = transformer(image).cuda()
        image = transformer(image)
        image = image.view(1, *image.size())
        image = Variable(image, volatile=True)
        model.eval()
        preds = model(image)
        _, preds = preds.max(2)
        preds = preds.squeeze(0).squeeze(0)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        preds_size = Variable(torch.IntTensor([preds.size(0)]))
        raw_pred = converter.decode(preds.data, preds_size.data, raw=True)
        sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
        #print('%-20s => %-20s' % (raw_pred, sim_pred))
        print(index)
        print(sim_pred)
        index = index + 1
def crnnRec(model,converter,im,text_recs):
   index = 0
   for rec in text_recs:
       pt1 = (rec[0],rec[1])
       pt2 = (rec[2],rec[3])
       pt3 = (rec[6],rec[7])
       pt4 = (rec[4],rec[5])
       partImg = dumpRotateImage(im,degrees(atan2(pt2[1]-pt1[1],pt2[0]-pt1[0])),pt1,pt2,pt3,pt4)
       #mahotas.imsave('%s.jpg'%index, partImg)
       

       image = Image.fromarray(partImg ).convert('L')
       #height,width,channel=partImg.shape[:3]
       #print(height,width,channel)
       #print(image.size) 

       #image = Image.open('./img/t4.jpg').convert('L')
       scale = image.size[1]*1.0 / 32
       w = image.size[0] / scale
       w = int(w)
       #print(w)

       transformer = dataset.resizeNormalize((w, 32))
       image = transformer(image).cuda()
       image = image.view(1, *image.size())
       image = Variable(image)
       model.eval()
       preds = model(image)
       _, preds = preds.max(2)
       preds = preds.squeeze(2)
       preds = preds.transpose(1, 0).contiguous().view(-1)
       preds_size = Variable(torch.IntTensor([preds.size(0)]))
       raw_pred = converter.decode(preds.data, preds_size.data, raw=True)
       sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
       #print('%-20s => %-20s' % (raw_pred, sim_pred))
       print(index)
       print(sim_pred)
       index = index + 1
Exemple #32
0
if torch.cuda.is_available() and not opt.cuda:
    print("WARNING: You have a CUDA device, so you should probably run with --cuda")

train_dataset = dataset.lmdbDataset(root=opt.trainroot)
assert train_dataset
if not opt.random_sample:
    sampler = dataset.randomSequentialSampler(train_dataset, opt.batchSize)
else:
    sampler = None
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=opt.batchSize,
    shuffle=True, sampler=sampler,
    num_workers=int(opt.workers),
    collate_fn=dataset.alignCollate(imgH=opt.imgH, keep_ratio=opt.keep_ratio))
test_dataset = dataset.lmdbDataset(
    root=opt.valroot, transform=dataset.resizeNormalize((100, 32)))

ngpu = int(opt.ngpu)
nh = int(opt.nh)
alphabet = opt.alphabet
nclass = len(alphabet) + 1
nc = 1

converter = utils.strLabelConverter(alphabet)
criterion = CTCLoss()


# custom weights initialization called on crnn
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
Exemple #33
0
from PIL import Image

import models.crnn as crnn


model_path = './data/crnn.pth'
img_path = './data/demo.png'
alphabet = '0123456789abcdefghijklmnopqrstuvwxyz'

model = crnn.CRNN(32, 1, 37, 256).cuda()
print('loading pretrained model from %s' % model_path)
model.load_state_dict(torch.load(model_path))

converter = utils.strLabelConverter(alphabet)

transformer = dataset.resizeNormalize((100, 32))
image = Image.open(img_path).convert('L')
image = transformer(image).cuda()
image = image.view(1, *image.size())
image = Variable(image)

model.eval()
preds = model(image)

_, preds = preds.max(2)
preds = preds.squeeze(2)
preds = preds.transpose(1, 0).contiguous().view(-1)

preds_size = Variable(torch.IntTensor([preds.size(0)]))
raw_pred = converter.decode(preds.data, preds_size.data, raw=True)
sim_pred = converter.decode(preds.data, preds_size.data, raw=False)