Ejemplo n.º 1
0
def main():
    root_path = '/home/mathu/Documents/express_recognition/data/telephone_txt/result/'
    train_img = root_path + 'print_pic'
    train_txt = root_path + 'print_txt'
    # root_path = '/home/mathu/Documents/express_recognition/data/icdar2015/'
    # train_img = root_path + 'train2015'
    # train_txt = root_path + 'train_label'

    trainset = custom_dset(train_img, train_txt)
    trainloader = DataLoader(trainset,
                             batch_size=16,
                             shuffle=True,
                             collate_fn=collate_fn,
                             num_workers=4)
    model = East()
    model = model.cuda()
    model.load_state_dict(torch.load('./checkpoints_total/model_1440.pth'))

    crit = LossFunc()
    weight_decay = 0
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    #  weight_decay=1)
    scheduler = lr_scheduler.StepLR(optimizer, step_size=10000, gamma=0.94)

    train(epochs=1500,
          model=model,
          trainloader=trainloader,
          crit=crit,
          optimizer=optimizer,
          scheduler=scheduler,
          save_step=20,
          weight_decay=weight_decay)

    write.close()
Ejemplo n.º 2
0
def main():
    global args
    args = parser.parse_args()
    init_log('global', logging.INFO)
    logger = logging.getLogger('global')

    train_data = custom_dset(args.data_img, args.data_txt)
    train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True,
                              collate_fn=collate_fn, num_workers=args.workers)
    logger.info("==============Build Dataset Done==============")

    model = East(args.pretrained)
    logger.info("==============Build Model Done================")
    logger.info(model)

    model = torch.nn.DataParallel(model).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            logger.info("=> loading checkpoint '{}'".format(args.resume))
            pretrained_dict = torch.load(args.resume)
            model.load_state_dict(pretrained_dict, strict=True)
            logger.info("=> loaded checkpoint '{}'".format(args.resume))
        else:
            logger.info("=> no checkpoint found at '{}'".format(args.resume))

    crit = LossFunc()

    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    scheduler = lr_scheduler.StepLR(optimizer, step_size=10000, 
                                    gamma=0.94)   
    
    train(epochs=args.epochs, model=model, train_loader=train_loader,
          crit=crit, optimizer=optimizer,scheduler=scheduler, 
          save_step=args.save_freq, weight_decay=args.weight_decay)
Ejemplo n.º 3
0
def main():
    root_path = './dataset/'
    train_img = root_path + 'train2015/'
    train_txt = root_path + 'train_label/'

    trainset = custom_dset(train_img, train_txt)
    print(trainset)
    trainloader = DataLoader(trainset,
                             batch_size=16,
                             shuffle=True,
                             collate_fn=collate_fn,
                             num_workers=4)
    model = East()
    model = model.cuda()

    crit = LossFunc()
    weight_decay = 0
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    #  weight_decay=1)
    scheduler = lr_scheduler.StepLR(optimizer, step_size=10000, gamma=0.94)

    train(epochs=1500,
          model=model,
          trainloader=trainloader,
          crit=crit,
          optimizer=optimizer,
          scheduler=scheduler,
          save_step=20,
          weight_decay=weight_decay)

    write.close()
Ejemplo n.º 4
0
def model_init(config):
    train_root_path = os.path.abspath(os.path.join(config["dataroot"],
                                                   'train'))
    train_img = os.path.join(train_root_path, 'img')
    train_gt = os.path.join(train_root_path, 'gt')

    trainset = custom_dset(train_img, train_gt)
    train_loader = DataLoader(trainset,
                              batch_size=config["train_batch_size_per_gpu"] *
                              config["gpu"],
                              shuffle=True,
                              collate_fn=collate_fn,
                              num_workers=config["num_workers"])

    logging.debug('Data loader created: Batch_size:{}, GPU {}:({})'.format(
        config["train_batch_size_per_gpu"] * config["gpu"], config["gpu"],
        config["gpu_ids"]))

    # Model
    model = East()
    model = nn.DataParallel(model, device_ids=config["gpu_ids"])
    model = model.cuda()
    init_weights(model, init_type=config["init_type"])
    logging.debug("Model initiated, init type: {}".format(config["init_type"]))

    cudnn.benchmark = True
    criterion = LossFunc()
    optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"])
    scheduler = lr_scheduler.StepLR(optimizer, step_size=10000, gamma=0.94)

    # init or resume
    if config["resume"] and os.path.isfile(config["checkpoint"]):
        start_epoch = load_checkpoint(config, model, optimizer)
    else:
        start_epoch = 0
    logging.debug("Model is running...")
    return model, criterion, optimizer, scheduler, train_loader, start_epoch
Ejemplo n.º 5
0
def main():
    hmean = .0
    is_best = False

    warnings.simplefilter('ignore', np.RankWarning)
    # Prepare for dataset
    print('EAST <==> Prepare <==> DataLoader <==> Begin')
    # train_root_path = os.path.abspath(os.path.join('./dataset/', 'train'))
    train_root_path = cfg.dataroot
    train_img = os.path.join(train_root_path, 'img')
    train_gt = os.path.join(train_root_path, 'gt')

    trainset = custom_dset(train_img, train_gt)
    train_loader = DataLoader(trainset,
                              batch_size=cfg.train_batch_size_per_gpu *
                              cfg.gpu,
                              shuffle=True,
                              collate_fn=collate_fn,
                              num_workers=cfg.num_workers)
    print('EAST <==> Prepare <==> Batch_size:{} <==> Begin'.format(
        cfg.train_batch_size_per_gpu * cfg.gpu))
    print('EAST <==> Prepare <==> DataLoader <==> Done')

    # test datalodaer
    """
    for i in range(100000):
        for j, (a,b,c,d) in enumerate(train_loader):
            print(i, j,'/',len(train_loader))
    """

    # Model
    print('EAST <==> Prepare <==> Network <==> Begin')
    model = East()
    model = nn.DataParallel(model, device_ids=cfg.gpu_ids)
    model = model.cuda()
    init_weights(model, init_type=cfg.init_type)
    cudnn.benchmark = True

    criterion = LossFunc()
    optimizer = torch.optim.Adam(model.parameters(), lr=cfg.lr)
    scheduler = lr_scheduler.StepLR(optimizer, step_size=10000, gamma=0.94)

    # init or resume
    if cfg.resume and os.path.isfile(cfg.checkpoint):
        weightpath = os.path.abspath(cfg.checkpoint)
        print(
            "EAST <==> Prepare <==> Loading checkpoint '{}' <==> Begin".format(
                weightpath))
        checkpoint = torch.load(weightpath)
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print(
            "EAST <==> Prepare <==> Loading checkpoint '{}' <==> Done".format(
                weightpath))
    else:
        start_epoch = 0
    print('EAST <==> Prepare <==> Network <==> Done')

    for epoch in range(start_epoch, cfg.max_epochs):

        train(train_loader, model, criterion, scheduler, optimizer, epoch)

        if epoch % cfg.eval_iteration == 0:

            # create res_file and img_with_box
            output_txt_dir_path = predict(model, criterion, epoch)

            # Zip file
            submit_path = MyZip(output_txt_dir_path, epoch)

            # submit and compute Hmean
            hmean_ = compute_hmean(submit_path)

            if hmean_ > hmean:
                is_best = True

            state = {
                'epoch': epoch,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'is_best': is_best,
            }
            save_checkpoint(state, epoch)
Ejemplo n.º 6
0
                                                  boxes,
                                                  test_size=0.35,
                                                  shuffle=True,
                                                  random_state=2021)
train_dataset = ReceiptDataset(X_train, y_train)
val_dataset = ReceiptDataset(X_val, y_val)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

# Model
EPOCHS = 50
model = East().to(device)
model.load_state_dict(torch.load('east1.pt'))
lr = 1e-4
loss_fn = Loss().to(device)
optimizer = Adam(model.parameters(), lr=lr)
best_val_loss = 0.455

train_loss = list()
val_loss = list()

for epoch in range(EPOCHS):
    print('Epoch {}'.format(epoch + 1))

    train_batch_loss = list()
    for X_batch_train, gt_score, gt_geo in tqdm(train_dataloader):
        X_batch_train = X_batch_train.to(device)
        gt_score = gt_score.to(device)
        gt_geo = gt_geo.to(device)
        pred_score, pred_geo = model(X_batch_train)
Ejemplo n.º 7
0
class EASTPredictor(object):
    def __init__(self, lr, weight_path, output_path):
        self.output_path = output_path
        self.model = East()
        self.model = nn.DataParallel(self.model, device_ids=[0])
        self.model = self.model.cuda()
        init_weights(self.model, init_type='xavier')
        cudnn.benchmark = True
    
        self.criterion = LossFunc()
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)
        self.scheduler = lr_scheduler.StepLR(self.optimizer, step_size=10000, gamma=0.94)
        self.weightpath = os.path.abspath(weight_path)
        logging.debug("EAST <==> Prepare <==> Loading checkpoint '{}' <==> Begin".format(self.weightpath))
        checkpoint = torch.load(self.weightpath)

        self.start_epoch = checkpoint['epoch']
        self.model.load_state_dict(checkpoint['state_dict'])
        self.optimizer.load_state_dict(checkpoint['optimizer'])
        logging.debug("EAST <==> Prepare <==> Loading checkpoint '{}', epoch={} <==> Done".format(self.weightpath, self.start_epoch))
        self.model.eval()

    def resize_image(self, im, max_side_len=2400):
        '''
        resize image to a size multiple of 32 which is required by the network
        :param im: the resized image
        :param max_side_len: limit of max image size to avoid out of memory in gpu
        :return: the resized image and the resize ratio
        '''
        h, w, _ = im.shape
    
        resize_w = w
        resize_h = h
    
        resize_h = resize_h if resize_h % 32 == 0 else (resize_h // 32 - 1) * 32
        resize_w = resize_w if resize_w % 32 == 0 else (resize_w // 32 - 1) * 32
        
        #resize_h, resize_w = 512, 512
        im = cv2.resize(im, (int(resize_w), int(resize_h)))
    
        ratio_h = resize_h / float(h)
        ratio_w = resize_w / float(w)
    
        return im, (ratio_h, ratio_w)
    
    def detect(self, score_map, geo_map, score_map_thresh=1e-5, box_thresh=1e-8, nms_thres=0.1):
        '''
        restore text boxes from score map and geo map
        :param score_map:
        :param geo_map:
        :param score_map_thresh: threshhold for score map
        :param box_thresh: threshhold for boxes
        :param nms_thres: threshold for nms
        :return:
        '''
        if len(score_map.shape) == 4:
            score_map = score_map[0, :, :, 0]
            geo_map = geo_map[0, :, :, ]
        # filter the score map
        xy_text = np.argwhere(score_map > score_map_thresh)
        # sort the text boxes via the y axis
        xy_text = xy_text[np.argsort(xy_text[:, 0])]
        # restore
        start = time.time()
        text_box_restored = restore_rectangle(xy_text[:, ::-1]*4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2
        logging.debug('{} text boxes before nms'.format(text_box_restored.shape[0]))
        boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
        boxes[:, :8] = text_box_restored.reshape((-1, 8))
        boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
        # nms part
        start = time.time()
        # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres)
        logging.debug('{} boxes before merging'.format(boxes.shape[0]))
        boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres)
        if boxes.shape[0] == 0:
            return None
        logging.debug('{} boxes before checking scores'.format(boxes.shape[0]))
        # here we filter some low score boxes by the average score map, this is different from the orginal paper
        for i, box in enumerate(boxes):
            mask = np.zeros_like(score_map, dtype=np.uint8)
            cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1)
            boxes[i, 8] = cv2.mean(score_map, mask)[0]
        boxes = boxes[boxes[:, 8] > box_thresh]
        return boxes
    
    
    def sort_poly(self, p):
        min_axis = np.argmin(np.sum(p, axis=1))
        p = p[[min_axis, (min_axis+1)%4, (min_axis+2)%4, (min_axis+3)%4]]
        if abs(p[0, 0] - p[1, 0]) > abs(p[0, 1] - p[1, 1]):
            return p
        else:
            return p[[0, 3, 2, 1]]
    

    def predict_one_file(self, img_file):
        im = cv2.imread(img_file)[:, :, ::-1]
        return predict_one_image(im. img_file)

    
    def predict_one_image(self, im, img_file):
    
        im_resized, (ratio_h, ratio_w) = self.resize_image(im)
        im_resized = im_resized.astype(np.float32)
        im_resized = im_resized.transpose(2, 0, 1)
        im_resized = torch.from_numpy(im_resized)
        im_resized = im_resized.cuda()
        im_resized = im_resized.unsqueeze(0)
    
        score, geometry = self.model(im_resized)
    
        score = score.permute(0, 2, 3, 1)
        geometry = geometry.permute(0, 2, 3, 1)
        score = score.data.cpu().numpy()
        geometry = geometry.data.cpu().numpy()

        boxes = self.detect(score_map=score, geo_map=geometry)

        letters = None
        if boxes is not None:
            boxes = boxes[:, :8].reshape((-1, 4, 2))
            boxes[:, :, 0] /= ratio_w
            boxes[:, :, 1] /= ratio_h
            logging.debug("found {} boxes".format(len(boxes)))
            fstem = pathlib.Path(img_file).stem
            letters = self.save_boxes(os.path.join(self.output_path, fstem + "_boxes.txt"), boxes)
            cv2.imwrite(os.path.join(self.output_path, fstem + "_with_box.jpg"), im[:, :, ::-1])
        else:
            logging.debug("Did not find boxes")
    
        return letters
    
    
    def save_boxes(self, filename, boxes):
        letters = []
        with open(filename, 'w+') as f:
            for box in boxes:
                box = self.sort_poly(box.astype(np.int32))
    
                if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3] - box[0]) < 5:
                    logging.debug('wrong direction')
                    continue
                
                #if box[0, 0] < 0 or box[0, 1] < 0 or box[1,0] < 0 or box[1,1] < 0 or box[2,0]<0 or box[2,1]<0 or box[3,0] < 0 or box[3,1]<0:
                #    logging.debug("wrong box, {}".format(box))
                #    continue
                for x in range(4):
                    for y in [0, 1]:
                        if (box[x, y] < 0):
                            box[x, y] = 0
                    
                poly = np.array([[box[0, 0], box[0, 1]], [box[1, 0], box[1, 1]], [box[2, 0], box[2, 1]], [box[3, 0], box[3, 1]]])
                
                p_area = polygon_area(poly)
                if p_area > 0:
                    poly = poly[(0, 3, 2, 1), :]
    
                f.write('{},{},{},{},{},{},{},{}\r\n'
                        .format(poly[0, 0], poly[0, 1], poly[1, 0], poly[1, 1], poly[2, 0], poly[2, 1], poly[3, 0], poly[3, 1],))
#                cv2.polylines(im[:, :, ::-1], [box.astype(np.int32).reshape((-1, 1, 2))], True, color=(255, 0, 0), thickness=1)
                letters.append(('', poly[0, 0], poly[0, 1], poly[2, 0], poly[2, 1]))
        return letters
Ejemplo n.º 8
0
def main():
    warnings.simplefilter('ignore', np.RankWarning)
    #Model
    video_root_path = os.path.abspath('./dataset/train/')
    video_name_list = sorted(
        [p for p in os.listdir(video_root_path) if p.split('_')[0] == 'Video'])
    #print('video_name_list', video_name_list)
    print('EAST <==> Prepare <==> Network <==> Begin')
    model = East()
    AGD_model = AGD()
    model = nn.DataParallel(model, device_ids=cfg.gpu_ids)
    #AGD_model = nn.DataParallel(AGD_model, device_ids=cfg.gpu_ids)
    model = model.cuda()
    AGD_model = AGD_model.cuda()
    init_weights(model, init_type=cfg.init_type)
    cudnn.benchmark = True

    criterion1 = LossFunc()
    #
    criterion2 = Ass_loss()

    optimizer1 = torch.optim.Adam(model.parameters(), lr=cfg.lr)
    optimizer2 = torch.optim.Adam(AGD_model.parameters(), lr=cfg.lr)
    scheduler = lr_scheduler.StepLR(optimizer1, step_size=10000, gamma=0.94)

    # init or resume
    if cfg.resume and os.path.isfile(cfg.checkpoint):
        weightpath = os.path.abspath(cfg.checkpoint)
        print(
            "EAST <==> Prepare <==> Loading checkpoint '{}' <==> Begin".format(
                weightpath))
        checkpoint = torch.load(weightpath)
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        #AGD_model.load_state_dict(checkpoint['model2.state_dict'])
        optimizer1.load_state_dict(checkpoint['optimizer'])
        #optimizer2.load_state_dict(checkpoint['optimizer2'])
        print(
            "EAST <==> Prepare <==> Loading checkpoint '{}' <==> Done".format(
                weightpath))
    else:
        start_epoch = 0
    print('EAST <==> Prepare <==> Network <==> Done')

    for epoch in range(start_epoch + 1, cfg.max_epochs):
        for video_name in video_name_list:
            print(
                'EAST <==> epoch:{} <==> Prepare <==> DataLoader <==>{} Begin'.
                format(epoch, video_name))
            trainset = custom_dset(os.path.join(video_root_path, video_name))
            #sampler = sampler_for_video_clip(len(trainset))
            train_loader = DataLoader(trainset,
                                      batch_size=cfg.train_batch_size_per_gpu *
                                      cfg.gpu,
                                      shuffle=False,
                                      collate_fn=collate_fn,
                                      num_workers=cfg.num_workers,
                                      drop_last=True)
            print('EAST <==> Prepare <==> Batch_size:{} <==> Begin'.format(
                cfg.train_batch_size_per_gpu * cfg.gpu))
            print(
                'EAST <==> epoch:{} <==> Prepare <==> DataLoader <==>{} Done'.
                format(epoch, video_name))

            train(train_loader, model, AGD_model, criterion1, criterion2,
                  scheduler, optimizer1, optimizer2, epoch)
            '''
            for i, (img, score_map, geo_map, training_mask, coord_ids) in enumerate(train_loader):
                print('i{} img.shape:{} geo_map.shape{} training_mask.shape{} coord_ids.len{}'.format(i, score_map.shape, geo_map.shape, training_mask.shape, len(coord_ids)))
            '''

        if epoch % cfg.eval_iteration == 0:
            state = {
                'epoch': epoch,
                'model1.state_dict': model.state_dict(),
                'model2.state_dict': AGD_model.state_dict(),
                'optimizer1': optimizer1.state_dict(),
                'optimizer2': optimizer2.state_dict()
            }
            save_checkpoint(state, epoch)