def test(modelpara):
    # load net
    net = CRAFT()     # initialize

    print('Loading weights from checkpoint {}'.format(modelpara))
    ####
    # if args.cuda:
    #     net.load_state_dict(copyStateDict(torch.load(modelpara)))
    # else:
    #     net.load_state_dict(copyStateDict(torch.load(modelpara, map_location='cpu')))
    #
    # if args.cuda:
    #     net = net.cuda()
    #     net = torch.nn.DataParallel(net)
    #     cudnn.benchmark = False
    ###

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    net = net.to(device)

    net.eval()    #stop update the weight of the neuron

    t = time.time()

    # load data
    for k, image_path in enumerate(image_list):
        print("Test image {:d}/{:d}: {:s}".format(k+1, len(image_list), image_path), end='\r')
        image = imgproc.loadImage(image_path)

        bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly)
        print("\n bboxes = ", bboxes, "\n poly = ", polys, "\n text = ", score_text, "\n text.shape = ", score_text.shape)
        # save score text
        filename, file_ext = os.path.splitext(os.path.basename(image_path))
        mask_file = result_folder + "/res_" + filename + '_mask.jpg'
        #cv2.imwrite(mask_file, score_text)
        print("save in" + result_folder)
        file_utils.saveResult(image_path, image[:,:,::-1], polys, dirname=result_folder)

    print("elapsed time : {}s".format(time.time() - t))
Esempio n. 2
0
class Ocr:
    def __init__(self):
        super().__init__()
        manager = Manager()
        self.send = manager.list()
        self.date = manager.list()
        self.quote = manager.list()
        self.number = manager.list()
        self.header = manager.list()
        self.sign = manager.list()
        self.device = torch.device('cpu')
        state_dict = torch.load(
            '/home/dung/Project/Python/ocr/craft_mlt_25k.pth')
        if list(state_dict.keys())[0].startswith("module"):
            start_idx = 1
        else:
            start_idx = 0
        new_state_dict = OrderedDict()
        for k, v in state_dict.items():
            name = ".".join(k.split(".")[start_idx:])
            new_state_dict[name] = v

        self.craft = CRAFT()
        self.craft.load_state_dict(new_state_dict)
        self.craft.to(self.device)
        self.craft.eval()
        self.craft.share_memory()
        self.config = Cfg.load_config_from_name('vgg_transformer')
        self.config[
            'weights'] = 'https://drive.google.com/uc?id=13327Y1tz1ohsm5YZMyXVMPIOjoOA0OaA'
        self.config['device'] = 'cpu'
        self.config['predictor']['beamsearch'] = False
        self.weights = '/home/dung/Documents/transformerocr.pth'

        # self.model, self.vocab = build_model(self.config)

    def predict(self, model, vocab, seq, key, idx, img):

        img = process_input(img, self.config['dataset']['image_height'],
                            self.config['dataset']['image_min_width'],
                            self.config['dataset']['image_max_width'])
        img = img.to(self.config['device'])
        with torch.no_grad():
            src = model.cnn(img)
            memory = model.transformer.forward_encoder(src)
            translated_sentence = [[1] * len(img)]
            max_length = 0
            while max_length <= 128 and not all(
                    np.any(np.asarray(translated_sentence).T == 2, axis=1)):
                tgt_inp = torch.LongTensor(translated_sentence).to(self.device)
                output = model.transformer.forward_decoder(tgt_inp, memory)
                output = output.to('cpu')
                values, indices = torch.topk(output, 5)
                indices = indices[:, -1, 0]
                indices = indices.tolist()
                translated_sentence.append(indices)
                max_length += 1
                del output
            translated_sentence = np.asarray(translated_sentence).T
        s = translated_sentence[0].tolist()
        s = vocab.decode(s)
        seq[idx] = s
        # print(time.time() - time1)

    def process(self, craft, seq, key, sub_img):
        img_resized, target_ratio, size_heatmap = resize_aspect_ratio(
            sub_img, 2560, interpolation=cv2.INTER_LINEAR, mag_ratio=1.)
        ratio_h = ratio_w = 1 / target_ratio

        x = normalizeMeanVariance(img_resized)
        x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
        x = x.unsqueeze(0)  # [c, h, w] to [b, c, h, w]
        x = x.to(self.device)
        y, feature = craft(x)
        score_text = y[0, :, :, 0].cpu().data.numpy()
        score_link = y[0, :, :, 1].cpu().data.numpy()
        boxes, polys = getDetBoxes(score_text,
                                   score_link,
                                   text_threshold=0.7,
                                   link_threshold=0.4,
                                   low_text=0.4,
                                   poly=False)
        boxes = adjustResultCoordinates(boxes, ratio_w, ratio_h)
        polys = adjustResultCoordinates(polys, ratio_w, ratio_h)
        for k in range(len(polys)):
            if polys[k] is None:
                polys[k] = boxes[k]
        result = []
        for i, box in enumerate(polys):
            poly = np.array(box).astype(np.int32).reshape((-1))
            result.append(poly)
        horizontal_list, free_list = group_text_box(result,
                                                    slope_ths=0.8,
                                                    ycenter_ths=0.5,
                                                    height_ths=1,
                                                    width_ths=1,
                                                    add_margin=0.1)
        # horizontal_list = [i for i in horizontal_list if i[0] > 0 and i[1] > 0]
        min_size = 20
        if min_size:
            horizontal_list = [
                i for i in horizontal_list
                if max(i[1] - i[0], i[3] - i[2]) > 10
            ]
            free_list = [
                i for i in free_list
                if max(diff([c[0] for c in i]), diff([c[1]
                                                      for c in i])) > min_size
            ]
        seq[:] = [None] * len(horizontal_list)
        model, vocab = build_model(self.config)
        model.load_state_dict(
            torch.load(self.weights, map_location=torch.device('cpu')))

        for i, ele in enumerate(horizontal_list):
            ele = [0 if i < 0 else i for i in ele]
            img = sub_img[ele[2]:ele[3], ele[0]:ele[1], :]
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = Image.fromarray(img.astype(np.uint8))
            p = threading.Thread(target=self.predict,
                                 args=(model, vocab, seq, key, i, img))
            p.start()
            p.join()
        # print(time.time() - time1)

    def forward(self, img, rs):
        # time1 = time.time()
        for key, v in rs.items():
            x0, y0, x1, y1 = v
            if key == 'send':
                p = mp.Process(target=self.process,
                               args=(
                                   self.craft,
                                   self.send,
                                   key,
                                   img[y0:y1, x0:x1, :],
                               ))
            elif key == 'date':
                p = mp.Process(target=self.process,
                               args=(
                                   self.craft,
                                   self.date,
                                   key,
                                   img[y0:y1, x0:x1, :],
                               ))
            elif key == 'quote':
                p = mp.Process(target=self.process,
                               args=(
                                   self.craft,
                                   self.date,
                                   key,
                                   img[y0:y1, x0:x1, :],
                               ))
            elif key == 'number':
                p = mp.Process(target=self.process,
                               args=(
                                   self.craft,
                                   self.date,
                                   key,
                                   img[y0:y1, x0:x1, :],
                               ))
            elif key == 'header':
                p = mp.Process(target=self.process,
                               args=(
                                   self.craft,
                                   self.date,
                                   key,
                                   img[y0:y1, x0:x1, :],
                               ))
            elif key == 'sign':
                p = mp.Process(target=self.process,
                               args=(
                                   self.craft,
                                   self.date,
                                   key,
                                   img[y0:y1, x0:x1, :],
                               ))
            p.start()
            p.join()
        return self.send[:], self.date[:], self.quote[:], self.number[:], self.header[:], self.sign[:]
Esempio n. 3
0
    print('Load the synthetic data ...')
    data_loader = Synth80k('D:/Datasets/SynthText')
    train_loader = torch.utils.data.DataLoader(data_loader,
                                               batch_size=1,
                                               shuffle=True,
                                               num_workers=0,
                                               drop_last=True,
                                               pin_memory=True)
    batch_syn = iter(train_loader)

    print('Prepare the net ...')
    net = CRAFT()
    net.load_state_dict(copyStateDict(
        torch.load('./weigths/synweights/0.pth')))
    net.to(device)
    data_parallel = False
    if torch.cuda.device_count() > 1:
        net = nn.DataParallel(net)
        data_parallel = True
    cudnn.benchmark = False

    print('Load the real data')
    real_data = ICDAR2013(net, 'D:/Datasets/ICDAR_2013')
    real_data_loader = torch.utils.data.DataLoader(real_data,
                                                   batch_size=5,
                                                   shuffle=True,
                                                   num_workers=0,
                                                   drop_last=True,
                                                   pin_memory=True)
Esempio n. 4
0
if not os.path.isdir(args.store_sample):
    os.system('mkdir {0}'.format(args.store_sample))

dataset = ImageLoader_synthtext(args)
assert dataset
data_loader = torch.utils.data.DataLoader(dataset, args.batch_size, num_workers=4, shuffle=True, collate_fn=collate)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
criterion = torch.nn.MSELoss(reduction='mean')
criterion = criterion.to(device)
craft = CRAFT(pretrained=True)

if args.go_on != '':
    print('loading pretrained model from %s' % args.pre_model)
    craft.load_state_dict(torch.load(args.pre_model), strict=False)
craft = craft.to(device)

loss_avg = averager()
optimizer = optim.Adam(craft.parameters(), lr=args.lr)

def train_batch(data):
    div = 10
    craft.train()
    img, char_label, interval_label = data
    img = img.to(device)
    char_label = char_label.to(device)
    interval_label = interval_label.to(device)

    img.requires_grad_()
    optimizer.zero_grad()
    preds, _ = craft(img)
Esempio n. 5
0
config={'is_training':True, 'image_path':'/home/lbh/dataset/icdar2015'}
dataset2 = ImageLoader2(config)
assert dataset2
data_loader2 = torch.utils.data.DataLoader(dataset2, args.batch_size2, num_workers=4, shuffle=True, collate_fn=collate2)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
criterion = torch.nn.MSELoss(reduction='mean')
criterion = criterion.to(device)
craft1 = CRAFT(pretrained=True)
craft2 = CRAFT(pretrained=True)
if args.go_on != '':
    print('loading pretrained model from %s' % args.pre_model1)
    print('loading pretrained model from %s' % args.pre_model2)
    craft1.load_state_dict(torch.load(args.pre_model1), strict=False)
    craft2.load_state_dict(torch.load(args.pre_model2), strict=False)
craft1 = craft1.to(device)
craft2 = craft2.to(device)
loss_avg = averager()
optimizer = optim.Adam(craft2.parameters(), lr=args.lr)

def train_batch1(data):
    craft2.train()
    img, char_label, interval_label = data
    img = img.to(device)
    char_label = char_label.to(device)
    interval_label = interval_label.to(device)

    img.requires_grad_()
    optimizer.zero_grad()
    preds, _ = craft2(img)
    cost_char = criterion(preds[:,:,:,0], char_label).sum()
def train(train_img_path, train_gt_path, pths_path, batch_size, lr,
          num_workers, epoch_iter, save_interval):
    filenum = len(os.listdir(train_img_path))
    trainset = custom_dataset(train_img_path, train_gt_path)
    train_loader = data.DataLoader(trainset, batch_size=batch_size, \
                                   shuffle=True, num_workers=num_workers, drop_last=True)
    criterion = Maploss()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = CRAFT()
    data_parallel = False

    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
        data_parallel = True

    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 weight_decay=args.weight_decay)
    scheduler = lr_scheduler.MultiStepLR(optimizer,
                                         milestones=[epoch_iter // 2],
                                         gamma=0.1)

    step_index = 0
    for epoch in range(epoch_iter):
        if epoch % 50 == 0 and epoch != 0:
            step_index += 1
            adjust_learning_rate(optimizer, args.gamma, step_index)

        model.train()
        scheduler.step()
        epoch_loss = 0
        epoch_time = time.time()
        for i, (img, gt_score, gt_geo, ignored_map) in enumerate(train_loader):
            start_time = time.time()
            img, gt_score, gt_geo, ignored_map = img.to(device), gt_score.to(
                device), gt_geo.to(device), ignored_map.to(device)
            pred_score, pred_geo = model(img)
            loss = criterion(gt_score, pred_score, gt_geo, pred_geo,
                             ignored_map)

            epoch_loss += loss.item()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            print('Epoch is [{}/{}], mini-batch is [{}/{}], time consumption is {:.8f}, batch_loss is {:.8f}'.format(\
                    epoch+1, epoch_iter, i+1, int(file_num/batch_size), time.time()-start_time, loss.item()))

        print('epoch_loss is {:.8f}, epoch_time is {:.8f}'.format(
            epoch_loss / int(file_num / batch_size),
            time.time() - epoch_time))
        print(time.asctime(time.localtime(time.time())))
        print('=' * 50)
        if (epoch + 1) % interval == 0:
            state_dict = model.module.state_dict(
            ) if data_parallel else model.state_dict()
            torch.save(
                state_dict,
                os.path.join(pths_path,
                             'model_epoch_{}.pth'.format(epoch + 1)))