def __init__(self, args): self.args = args self.device = torch.device(args.device) # image transform input_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(cfg.DATASET.MEAN, cfg.DATASET.STD), ]) # dataset and dataloader val_dataset = get_segmentation_dataset(cfg.DATASET.NAME, split='val', mode='testval', transform=input_transform) val_sampler = make_data_sampler(val_dataset, False, args.distributed) val_batch_sampler = make_batch_data_sampler(val_sampler, images_per_batch=cfg.TEST.BATCH_SIZE, drop_last=False) self.val_loader = data.DataLoader(dataset=val_dataset, batch_sampler=val_batch_sampler, num_workers=cfg.DATASET.WORKERS, pin_memory=True) self.classes = val_dataset.classes # create network self.model = get_segmentation_model().to(self.device) if hasattr(self.model, 'encoder') and hasattr(self.model.encoder, 'named_modules') and \ cfg.MODEL.BN_EPS_FOR_ENCODER: logging.info('set bn custom eps for bn in encoder: {}'.format(cfg.MODEL.BN_EPS_FOR_ENCODER)) self.set_batch_norm_attr(self.model.encoder.named_modules(), 'eps', cfg.MODEL.BN_EPS_FOR_ENCODER) if args.distributed: self.model = nn.parallel.DistributedDataParallel(self.model, device_ids=[args.local_rank], output_device=args.local_rank, find_unused_parameters=True) self.model.to(self.device) self.metric = SegmentationMetric(val_dataset.num_class, args.distributed)
def __init__(self, args): self.args = args self.device = torch.device(args.device) self.n_bins = 15 self.ece_folder = "eceData" # self.postfix="foggy_conv13_CityScapes_GPU" self.postfix = "foggy_zurich_conv13" # self.postfix="Foggy_1_conv13_PascalVOC_GPU" self.temp = 1.5 # self.useCRF=False self.useCRF = True self.ece_criterion = metrics.IterativeECELoss() self.ece_criterion.make_bins(n_bins=self.n_bins) # image transform input_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(cfg.DATASET.MEAN, cfg.DATASET.STD), ]) # dataset and dataloader val_dataset = get_segmentation_dataset(cfg.DATASET.NAME, split='val', mode='testval', transform=input_transform) val_sampler = make_data_sampler(val_dataset, False, args.distributed) val_batch_sampler = make_batch_data_sampler( val_sampler, images_per_batch=cfg.TEST.BATCH_SIZE, drop_last=False) self.val_loader = data.DataLoader(dataset=val_dataset, batch_sampler=val_batch_sampler, num_workers=cfg.DATASET.WORKERS, pin_memory=True) self.dataset = val_dataset self.classes = val_dataset.classes print(args.distributed) self.metric = SegmentationMetric(val_dataset.num_class, args.distributed) self.model = get_segmentation_model().to(self.device) if hasattr(self.model, 'encoder') and hasattr(self.model.encoder, 'named_modules') and \ cfg.MODEL.BN_EPS_FOR_ENCODER: logging.info('set bn custom eps for bn in encoder: {}'.format( cfg.MODEL.BN_EPS_FOR_ENCODER)) self.set_batch_norm_attr(self.model.encoder.named_modules(), 'eps', cfg.MODEL.BN_EPS_FOR_ENCODER) if args.distributed: self.model = nn.parallel.DistributedDataParallel( self.model, device_ids=[args.local_rank], output_device=args.local_rank, find_unused_parameters=True) self.model.to(self.device)
def __init__(self, args): self.args = args self.device = torch.device(args.device) # image transform input_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(cfg.DATASET.MEAN, cfg.DATASET.STD), ]) self.lr = 7.5 self.prefix = f"2_img_cce_only_lr={self.lr}" # self.prefix = f"overfit_with_bin_fraction_loss=no_bin_weights_ALPHA=0.5_lr={self.lr}" self.writer = SummaryWriter( log_dir=f"cce_cityscapes_conv_fcn_logs/{self.prefix}") # dataset and dataloader val_dataset = get_segmentation_dataset(cfg.DATASET.NAME, split='val', mode='testval', transform=input_transform) # val_sampler = make_data_sampler(val_dataset, False, args.distributed) self.val_loader = data.DataLoader(dataset=val_dataset, shuffle=True, batch_size=cfg.TEST.BATCH_SIZE, drop_last=True, num_workers=cfg.DATASET.WORKERS, pin_memory=True) self.dataset = val_dataset self.classes = val_dataset.classes self.metric = SegmentationMetric(val_dataset.num_class, args.distributed) self.model = get_segmentation_model().to(self.device) self.poolnet = poolNet(len(self.classes)).to(self.device) self.fcn = FCNs(self.poolnet, len(self.classes)).to(self.device) if hasattr(self.model, 'encoder') and hasattr(self.model.encoder, 'named_modules') and \ cfg.MODEL.BN_EPS_FOR_ENCODER: logging.info('set bn custom eps for bn in encoder: {}'.format( cfg.MODEL.BN_EPS_FOR_ENCODER)) self.set_batch_norm_attr(self.model.encoder.named_modules(), 'eps', cfg.MODEL.BN_EPS_FOR_ENCODER) if args.distributed: self.model = nn.parallel.DistributedDataParallel( self.model, device_ids=[args.local_rank], output_device=args.local_rank, find_unused_parameters=True) self.model.to(self.device)
def demo(): args = parse_args() cfg.update_from_file(args.config_file) cfg.PHASE = 'test' cfg.ROOT_PATH = root_path cfg.check_and_freeze() default_setup(args) # output folder output_dir = os.path.join( cfg.VISUAL.OUTPUT_DIR, 'vis_result_{}_{}_{}_{}'.format(cfg.MODEL.MODEL_NAME, cfg.MODEL.BACKBONE, cfg.DATASET.NAME, cfg.TIME_STAMP)) if not os.path.exists(output_dir): os.makedirs(output_dir) # image transform transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(cfg.DATASET.MEAN, cfg.DATASET.STD), ]) model = get_segmentation_model().to(args.device) model.eval() if os.path.isdir(args.input_img): img_paths = [ os.path.join(args.input_img, x) for x in os.listdir(args.input_img) ] else: img_paths = [args.input_img] for img_path in img_paths: image = Image.open(img_path).convert('RGB') images = transform(image).unsqueeze(0).to(args.device) with torch.no_grad(): output = model(images) pred = torch.argmax(output[0], 1).squeeze(0).cpu().data.numpy() mask = get_color_pallete(pred, cfg.DATASET.NAME) outname = os.path.splitext(os.path.split(img_path)[-1])[0] + '.png' mask.save(os.path.join(output_dir, outname))
def __init__(self, args): self.args = args self.device = torch.device(args.device) # image transform input_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(cfg.DATASET.MEAN, cfg.DATASET.STD), ]) # dataset and dataloader data_kwargs = { 'transform': input_transform, 'base_size': cfg.TRAIN.BASE_SIZE, 'crop_size': cfg.TRAIN.CROP_SIZE } train_dataset = get_segmentation_dataset(cfg.DATASET.NAME, split='train', mode='train', **data_kwargs) val_dataset = get_segmentation_dataset(cfg.DATASET.NAME, split='val', mode=cfg.DATASET.MODE, **data_kwargs) self.iters_per_epoch = len(train_dataset) // (args.num_gpus * cfg.TRAIN.BATCH_SIZE) self.max_iters = cfg.TRAIN.EPOCHS * self.iters_per_epoch train_sampler = make_data_sampler(train_dataset, shuffle=True, distributed=args.distributed) train_batch_sampler = make_batch_data_sampler(train_sampler, cfg.TRAIN.BATCH_SIZE, self.max_iters, drop_last=True) val_sampler = make_data_sampler(val_dataset, False, args.distributed) val_batch_sampler = make_batch_data_sampler(val_sampler, cfg.TEST.BATCH_SIZE, drop_last=False) self.train_loader = data.DataLoader(dataset=train_dataset, batch_sampler=train_batch_sampler, num_workers=cfg.DATASET.WORKERS, pin_memory=True) self.val_loader = data.DataLoader(dataset=val_dataset, batch_sampler=val_batch_sampler, num_workers=cfg.DATASET.WORKERS, pin_memory=True) # create network self.model = get_segmentation_model().to(self.device) # print params and flops if get_rank() == 0: try: show_flops_params(self.model, args.device) except Exception as e: logging.warning('get flops and params error: {}'.format(e)) if cfg.MODEL.BN_TYPE not in ['BN']: logging.info( 'Batch norm type is {}, convert_sync_batchnorm is not effective' .format(cfg.MODEL.BN_TYPE)) elif args.distributed and cfg.TRAIN.SYNC_BATCH_NORM: self.model = nn.SyncBatchNorm.convert_sync_batchnorm(self.model) logging.info('SyncBatchNorm is effective!') else: logging.info('Not use SyncBatchNorm!') # create criterion self.criterion = get_segmentation_loss( cfg.MODEL.MODEL_NAME, use_ohem=cfg.SOLVER.OHEM, aux=cfg.SOLVER.AUX, aux_weight=cfg.SOLVER.AUX_WEIGHT, ignore_index=cfg.DATASET.IGNORE_INDEX).to(self.device) # optimizer, for model just includes encoder, decoder(head and auxlayer). self.optimizer = get_optimizer(self.model) # lr scheduling self.lr_scheduler = get_scheduler(self.optimizer, max_iters=self.max_iters, iters_per_epoch=self.iters_per_epoch) # resume checkpoint if needed self.start_epoch = 0 if args.resume and os.path.isfile(args.resume): name, ext = os.path.splitext(args.resume) assert ext == '.pkl' or '.pth', 'Sorry only .pth and .pkl files supported.' logging.info('Resuming training, loading {}...'.format( args.resume)) resume_sate = torch.load(args.resume) self.model.load_state_dict(resume_sate['state_dict']) self.start_epoch = resume_sate['epoch'] logging.info('resume train from epoch: {}'.format( self.start_epoch)) if resume_sate['optimizer'] is not None and resume_sate[ 'lr_scheduler'] is not None: logging.info( 'resume optimizer and lr scheduler from resume state..') self.optimizer.load_state_dict(resume_sate['optimizer']) self.lr_scheduler.load_state_dict(resume_sate['lr_scheduler']) if args.distributed: self.model = nn.parallel.DistributedDataParallel( self.model, device_ids=[args.local_rank], output_device=args.local_rank, find_unused_parameters=True) # evaluation metrics self.metric = SegmentationMetric(train_dataset.num_class, args.distributed) self.best_pred = 0.0
def __init__(self, args): self.args = args self.device = torch.device(args.device) # image transform input_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(cfg.DATASET.MEAN, cfg.DATASET.STD), ]) # test dataloader val_dataset = get_segmentation_dataset(cfg.DATASET.NAME, split='test', mode='val', transform=input_transform, base_size=cfg.TRAIN.BASE_SIZE) # validation dataloader # val_dataset = get_segmentation_dataset(cfg.DATASET.NAME, # split='validation', # mode='val', # transform=input_transform, # base_size=cfg.TRAIN.BASE_SIZE) val_sampler = make_data_sampler(val_dataset, shuffle=False, distributed=args.distributed) val_batch_sampler = make_batch_data_sampler( val_sampler, images_per_batch=cfg.TEST.BATCH_SIZE, drop_last=False) self.val_loader = data.DataLoader(dataset=val_dataset, batch_sampler=val_batch_sampler, num_workers=cfg.DATASET.WORKERS, pin_memory=True) logging.info('**** number of images: {}. ****'.format( len(self.val_loader))) self.classes = val_dataset.classes # create network self.model = get_segmentation_model().to(self.device) if hasattr(self.model, 'encoder') and cfg.MODEL.BN_EPS_FOR_ENCODER: logging.info('set bn custom eps for bn in encoder: {}'.format( cfg.MODEL.BN_EPS_FOR_ENCODER)) self.set_batch_norm_attr(self.model.encoder.named_modules(), 'eps', cfg.MODEL.BN_EPS_FOR_ENCODER) if args.distributed: self.model = nn.parallel.DistributedDataParallel( self.model, device_ids=[args.local_rank], output_device=args.local_rank, find_unused_parameters=True) self.model.to(self.device) num_gpu = args.num_gpus # metric of easy and hard images self.metric = SegmentationMetric(val_dataset.num_class, args.distributed, num_gpu) self.metric_easy = SegmentationMetric(val_dataset.num_class, args.distributed, num_gpu) self.metric_hard = SegmentationMetric(val_dataset.num_class, args.distributed, num_gpu) # number of easy and hard images self.count_easy = 0 self.count_hard = 0
def demo(): args = parse_args() cfg.update_from_file(args.config_file) cfg.PHASE = 'test' cfg.ROOT_PATH = root_path cfg.check_and_freeze() default_setup(args) # output folder output_dir = 'demo/trash/IC15' if not os.path.exists(output_dir): os.makedirs(output_dir) # image transform transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(cfg.DATASET.MEAN, cfg.DATASET.STD), ]) model = get_segmentation_model().to(args.device) model.eval() #get img_patch from IC15 if os.path.exists( '/mnt/lustre/share_data/xieenze/xez_space/Text/ICDAR2015/'): ic15_root_path = '/mnt/lustre/share_data/xieenze/xez_space/Text/ICDAR2015/' else: ic15_root_path = '/mnt/lustre/share/xieenze/Text/ICDAR2015/' ic15_train_data = ic15_root_path + 'ch4_training_images' ic15_train_gt = ic15_root_path + 'ch4_training_localization_transcription_gt' assert os.path.exists(ic15_train_data) and os.path.exists(ic15_train_gt) patch_imgs = [] for i in trange(1, 501): img_path = 'img_{}.jpg'.format(i) img_path = os.path.join(ic15_train_data, img_path) gt_path = 'gt_img_{}.txt'.format(i) gt_path = os.path.join(ic15_train_gt, gt_path) if os.path.exists(gt_path) and os.path.exists(img_path): img, boxes = parse_img_gt(img_path, gt_path) img = np.array(img) if boxes == []: continue for box in boxes: x1, y1, x2, y2 = box patch = img[y1:y2 + 1, x1:x2 + 1] patch_imgs.append(Image.fromarray(patch)) # 先只测500张 if len(patch_imgs) > 500: break else: print(img_path) print('total patch images:{}'.format(len(patch_imgs))) pool_imgs, pool_masks = [], [] count = 0 for image in patch_imgs: # image = Image.open(img_path).convert('RGB') resized_img = image.resize(cfg.TRAIN.BASE_SIZE) resized_img = transform(resized_img).unsqueeze(0).to(args.device) with torch.no_grad(): output = model(resized_img) pred = torch.argmax(output[0], 1).squeeze(0).cpu().data.numpy() img = np.array(image.resize(cfg.TRAIN.BASE_SIZE)) mask = np.array(get_color_pallete( pred, cfg.DATASET.NAME))[:, :, None].repeat(3, -1) * 255 if len(pool_imgs) < 20: pool_imgs.append(img) pool_masks.append(mask) else: big_img = np.concatenate(pool_imgs, axis=0) big_mask = np.concatenate(pool_masks, axis=0) big_img_mask = Image.fromarray( np.concatenate([big_img, big_mask], axis=1)) big_img_mask.save('{}/{}.png'.format(output_dir, count)) print('{}/{}.png'.format(output_dir, count)) count += 1 pool_imgs, pool_masks = [], []
def demo(): args = parse_args() cfg.update_from_file(args.config_file) cfg.PHASE = 'test' cfg.ROOT_PATH = root_path cfg.check_and_freeze() default_setup(args) # temp=1.8 temp = 1.6 # usingCRF=False usingCRF = True # output folder output_dir = os.path.join( cfg.VISUAL.OUTPUT_DIR, 'snow_1_conv_9_{}_{}_{}_{}_temp_{}_crf_{}'.format( cfg.MODEL.MODEL_NAME, cfg.MODEL.BACKBONE, cfg.DATASET.NAME, cfg.TIME_STAMP, temp, usingCRF)) if not os.path.exists(output_dir): os.makedirs(output_dir) # image transform transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(cfg.DATASET.MEAN, cfg.DATASET.STD), ]) model = get_segmentation_model().to(args.device) model.eval() if os.path.isdir(args.input_img): img_paths = [ os.path.join(args.input_img, x) for x in os.listdir(args.input_img) ] else: img_paths = [args.input_img] for img_path in img_paths: image_pil = Image.open(img_path).convert('RGB') image = transform(image_pil).unsqueeze(0).to(args.device) with torch.no_grad(): output = model.evaluate(image).detach() # output shape is [1,21,w,h] connected to cuda # import pdb; pdb.set_trace() print(img_path) if (usingCRF): non_cali_crf_output = output.clone() output /= temp pre_crf_pred = torch.argmax(output, 1).squeeze(0).cpu().data.numpy() pre_crf_mask = get_color_pallete(pre_crf_pred, cfg.DATASET.NAME) raw_image = cv2.imread(img_path, cv2.IMREAD_COLOR).astype( np.float32).transpose(2, 0, 1) raw_image = torch.from_numpy(raw_image).to(args.device) raw_image = raw_image.unsqueeze(dim=0) # output shape is [1,21,w,h] num_classes = output.shape[1] crf = GaussCRF(conf=get_default_conf(), shape=image.shape[2:], nclasses=num_classes, use_gpu=True) crf = crf.to(args.device) assert image.shape == raw_image.shape output = crf.forward(output, raw_image) # print(output.shape) # Saving the image pred = torch.argmax(output, 1).squeeze(0).cpu().data.numpy() mask = get_color_pallete(pred, cfg.DATASET.NAME) outname = os.path.splitext(os.path.split( img_path)[-1])[0] + f'_temp_{temp}_crf_{usingCRF}.png' # Uncalibrated image withth crf non_cali_crf_output = crf.forward(non_cali_crf_output, raw_image) non_cali_crf_pred = torch.argmax(non_cali_crf_output, 1).squeeze(0).cpu().data.numpy() non_cali_crf_mask = get_color_pallete(non_cali_crf_pred, cfg.DATASET.NAME) # Concatenating horizontally [out_post_crf,out_pre_crf, rgb] dst = Image.new('RGB', (4 * mask.width + 9, mask.height), color="white") dst.paste(mask, (0, 0)) dst.paste(non_cali_crf_mask, (mask.width + 3, 0)) dst.paste(pre_crf_mask, (2 * mask.width + 6, 0)) dst.paste(image_pil, (3 * mask.width + 9, 0)) dst.save(os.path.join(output_dir, outname)) else: pred = torch.argmax(output, 1).squeeze(0).cpu().data.numpy() mask = get_color_pallete(pred, cfg.DATASET.NAME) # Concatenating horizontally [output, rgb] dst = Image.new('RGB', (mask.width + image_pil.width, mask.height)) dst.paste(mask, (0, 0)) dst.paste(image_pil, (mask.width, 0)) outname = os.path.splitext(os.path.split( img_path)[-1])[0] + f'_temp_{temp}_crf_{usingCRF}.png' # mask.save(os.path.join(output_dir, outname)) dst.save(os.path.join(output_dir, outname))
def demo(): args = parse_args() cfg.update_from_file(args.config_file) cfg.PHASE = 'test' cfg.ROOT_PATH = root_path cfg.check_and_freeze() default_setup(args) temp = 1.8 # temp=3 usingCRF = False usingCRF = True # output folder output_dir = os.path.join( cfg.VISUAL.OUTPUT_DIR, 'vis_result_{}_{}_{}_{}_temp_{}_crf_{}'.format( cfg.MODEL.MODEL_NAME, cfg.MODEL.BACKBONE, cfg.DATASET.NAME, cfg.TIME_STAMP, temp, usingCRF)) if not os.path.exists(output_dir): os.makedirs(output_dir) # image transform transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(cfg.DATASET.MEAN, cfg.DATASET.STD), ]) model = get_segmentation_model().to(args.device) model.eval() if os.path.isdir(args.input_img): img_paths = [ os.path.join(args.input_img, x) for x in os.listdir(args.input_img) ] else: img_paths = [args.input_img] for img_path in img_paths: image = Image.open(img_path).convert('RGB') images = transform(image).unsqueeze(0).to(args.device) with torch.no_grad(): output = model(images) # import pdb;pdb.set_trace() # output=output _, H, W = images[0].shape logit = F.interpolate(output[0], size=(H, W), mode="bilinear", align_corners=True) print(img_path, logit.shape) logit /= temp # output_prob=F.softmax(logit/temp,dim=1) # output_prob=output_prob.cpu().numpy() if (usingCRF): # raw_image = cv2.imread(img_path, cv2.IMREAD_COLOR).astype(np.float32) # mean_bgr=np.array([103.53, 116.28, 123.675]) # # Do some subtraction # raw_image-=mean_bgr # # converted to C H W # raw_image=raw_image.transpose(2,0,1) # raw_image=raw_image.astype(np.uint8) # raw_image=raw_image.transpose(1,2,0) # raw_images.append(raw_image) # postprocessor= DenseCRF(iter_max=cfg.CRF.ITER_MAX, # pos_xy_std=cfg.CRF.POS_XY_STD, # pos_w=cfg.CRF.POS_W, # bi_xy_std=cfg.CRF.BI_XY_STD, # bi_rgb_std=cfg.CRF.BI_RGB_STD, # bi_w=cfg.CRF.BI_W, # ) postprocessor = CrfRnn(21) raw_image = cv2.imread(img_path, cv2.IMREAD_COLOR).astype( np.float32).transpose(2, 0, 1) raw_image = torch.from_numpy(raw_image).unsqueeze(dim=0) prob_post = postprocessor(raw_image, logit.cpu().softmax(dim=1)) print(prob_post.shape) pred = np.argmax(prob_post.squeeze(0).detach().numpy(), axis=0) else: pred = torch.argmax(logit, 1).squeeze(0).cpu().data.numpy() mask = get_color_pallete(pred, cfg.DATASET.NAME) outname = os.path.splitext(os.path.split( img_path)[-1])[0] + f'_temp_{temp}_crf_{usingCRF}.png' mask.save(os.path.join(output_dir, outname))
def demo(): args = parse_args() cfg.update_from_file(args.config_file) cfg.PHASE = 'test' cfg.ROOT_PATH = root_path cfg.check_and_freeze() default_setup(args) # image transform transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(cfg.DATASET.MEAN, cfg.DATASET.STD), ]) model = get_segmentation_model().to(args.device) model.eval() #get img_patch from IC15 ic15_root_path = '/mnt/lustre/share_data/xieenze/xez_space/Text/ICDAR2015/' ic15_train_data = ic15_root_path + 'ch4_training_images' ic15_train_gt = ic15_root_path + 'ch4_training_localization_transcription_gt' '''bbox + pseudo box ''' ic15_train_gt_bbox = ic15_root_path + 'ch4_training_localization_transcription_gt_bbox' ic15_train_gt_pseudobox = ic15_root_path + 'ch4_training_localization_transcription_gt_pseudo' os.system('rm -rf {}/*txt'.format(ic15_train_gt_bbox))#; os.mkdir(ic15_train_gt_bbox) os.system('rm -rf {}/*txt'.format(ic15_train_gt_pseudobox))#; os.mkdir(ic15_train_gt_pseudobox) num_ic15_imgs = 1000 #遍历图片 for i in trange(1, num_ic15_imgs+1): if debug_flag: #debug----------- if len(theta2s) > 500: break # debug----------- img_path = 'img_{}.jpg'.format(i) img_path = os.path.join(ic15_train_data, img_path) gt_path = 'gt_img_{}.txt'.format(i) gt_path = os.path.join(ic15_train_gt, gt_path) if os.path.exists(gt_path) and os.path.exists(img_path): img, boxes, ori_box = parse_img_gt(img_path, gt_path) img = np.array(img) #遍历box f_bbox = open(os.path.join(ic15_train_gt_bbox,'gt_img_{}.txt'.format(i)),'w') f_rbox = open(os.path.join(ic15_train_gt_pseudobox, 'gt_img_{}.txt'.format(i)), 'w') seq_bbox, seq_rbox = [],[] for j, box in enumerate(boxes): mask = np.zeros_like(img)[:, :, 0] x1, y1, x2, y2, is_ignore = box patch = img[y1:y2 + 1, x1:x2 + 1] patch = Image.fromarray(patch) pred_gt = inference(model, patch, transform) mask[y1:y2 + 1, x1:x2 + 1] = pred_gt #get bbox rbox _, rbox = get_pseudo_pabel(mask) bbox = np.array([[x1, y1], [x2, y1], [x2, y2], [x1, y2]], dtype='int32') if debug_flag: if is_ignore == 1: cv2.drawContours(img, [rbox], 0, (0, 0, 255), 1) cv2.drawContours(img, [bbox], 0, (0, 255, 0), 1) cv2.drawContours(img, [ori_box[j]], 0, (255, 0, 0), 1) else: cv2.drawContours(img, [rbox], 0, (0, 255, 255), 1) cv2.drawContours(img, [bbox], 0, (255, 255, 0), 1) cv2.drawContours(img, [ori_box[j]], 0, (255, 0, 255), 1) bbox, rbox = list(bbox.reshape(-1)), list(rbox.reshape(-1)) #起始点和IC15的需要对齐,平移一下 # embed() bbox = adjust_box_sort(bbox) rbox = adjust_box_sort(rbox) if debug_flag: #debug------------------------- if is_ignore == 1: pes_r_box = np.array(rbox)[None,...] ori_r_box = ori_box[j].reshape(-1) theta1 = find_min_rect_angle(pes_r_box) / math.pi * 180 theta2 = find_min_rect_angle(ori_r_box) / math.pi * 180 theta1s.append(theta1) theta2s.append(theta2) if abs(theta2-theta1)>25: print(i) delta_theat.append(theta1-theta2) # debug------------------------- if is_ignore == 1: seq_bbox.append(",".join([str(int(i)) for i in bbox])+',aaa,\n') seq_rbox.append(",".join([str(int(i)) for i in rbox])+',aaa,\n') else: seq_bbox.append(",".join([str(int(i)) for i in bbox]) + ',###,\n') seq_rbox.append(",".join([str(int(i)) for i in rbox]) + ',###,\n') f_bbox.writelines(seq_bbox) f_rbox.writelines(seq_rbox) f_bbox.close() f_rbox.close() if debug_flag: print('debug vis') cv2.imwrite('trash/img{}.png'.format(i), img[:,:,[2,1,0]]) else: print(img_path) if debug_flag: # debug------------------------- x = [i for i in range(len(theta1s))] plt.rcParams['figure.figsize'] = (10.0, 4.0) plt.rcParams['savefig.dpi'] = 300 # 图片像素 plt.rcParams['figure.dpi'] = 300 # 分辨 # plt.plot(x, theta1s) # plt.plot(x, theta2s) plt.plot(x, delta_theat) plt.title('line chart') plt.xlabel('x') plt.ylabel('theta') plt.savefig('trash/theta.png')
def demo(): args = parse_args() cfg.update_from_file(args.config_file) cfg.PHASE = 'test' cfg.ROOT_PATH = root_path cfg.check_and_freeze() default_setup(args) # output folder output_dir = '/home/xjc/Desktop/CVPR_SemiText/SemiText/TextBoxSeg/demo/TT_attention' if not os.path.exists(output_dir): os.makedirs(output_dir) # image transform transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(cfg.DATASET.MEAN, cfg.DATASET.STD), ]) model = get_segmentation_model().to(args.device) model.eval() #get img_patch from Total Text if os.path.exists('/home/xjc/Dataset/total-text/'): total_root_path = '/home/xjc/Dataset/total-text/' else: total_root_path = '/home/xjc/Dataset/total-text/' total_train_data = total_root_path + 'Images/Train/' total_train_gt = total_root_path + 'gt/Train/' assert os.path.exists(total_train_data) and os.path.exists(total_train_gt) patch_imgs = [] patch_gt = [] for i in trange(1, 501): img_path = 'img{}.jpg'.format(i) img_path = os.path.join(total_train_data, img_path) gt_path = 'poly_gt_img{}.mat'.format(i) gt_path = os.path.join(total_train_gt, gt_path) if os.path.exists(gt_path) and os.path.exists(img_path): img, boxes, polygon_list = parse_img_gt(img_path, gt_path) img = np.array(img) if boxes == []: continue for bo_idx, box in enumerate(boxes): x1, y1, x2, y2 = box patch = img[y1:y2 + 1, x1:x2 + 1] patch_imgs.append(Image.fromarray(patch)) gt_image = np.zeros(img.shape[:2], dtype=np.uint8) # print(polygon_list[bo_idx]) cv2.fillPoly(gt_image, [np.array(polygon_list[bo_idx])], 1) gt_path = gt_image[y1:y2 + 1, x1:x2 + 1] patch_gt.append(gt_path) # 先只测500张 if len(patch_imgs) > 500: break else: print(img_path) print('total patch images:{}'.format(len(patch_imgs))) pool_imgs, pool_masks, pool_gts, dist_imgs, dist_img_pres = [], [], [], [], [] count = 0 for idx_image, image in enumerate(patch_imgs): # image = Image.open(img_path).convert('RGB') gt_path_one = patch_gt[idx_image] origin_h, origin_w = image.height, image.width if origin_h > origin_w: image = image.transpose(Image.ROTATE_90) # 将图片旋转90度 gt_path_one = RotateClockWise90(gt_path_one) # cfg.TRAIN.BASE_SIZE resized_img = image.resize(cfg.TRAIN.BASE_SIZE) # resized_img = scale_image(image) resized_img = transform(resized_img).unsqueeze(0).to(args.device) with torch.no_grad(): output, skeleton = model(resized_img) pred = torch.argmax(output[0], 1).squeeze(0).cpu().data.numpy() skeleton_1 = (skeleton[0][0] * 255).squeeze(0).cpu().data.numpy().astype("int64") dist_img = np.array(get_color_pallete( skeleton_1, cfg.DATASET.NAME))[:, :, None].repeat(3, -1) dist_img_pre = cv2.resize(dist_img, (128, 96)) img = np.array(image.resize((128, 96))) dis_ = gt_path_one.copy() gt_path_one = np.array(get_color_pallete( gt_path_one, cfg.DATASET.NAME))[:, :, None].repeat(3, -1) * 255 gt_path_one = cv2.resize(gt_path_one, (128, 96)) dis_ = cv2.resize(dis_, (128, 96)) dist_img = cv2.distanceTransform(np.array(dis_), cv2.DIST_L1, cv2.DIST_MASK_3) # dist_back = cv2.distanceTransform((1 - np.array(dis_)), cv2.DIST_L1, cv2.DIST_MASK_3) dist_img = (dist_img / dist_img.max() * 0.5 + 0.5) dist_img = np.array(get_color_pallete( dist_img * 255, cfg.DATASET.NAME))[:, :, None].repeat(3, -1) mask = np.array(get_color_pallete( pred, cfg.DATASET.NAME))[:, :, None].repeat(3, -1) * 255 kernel = np.ones((6, 6), np.uint8) mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel) mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel) # erosion = cv2.erode(img, kernel, iterations=1) kernel = np.ones((3, 3), np.uint8) mask = cv2.dilate(mask, kernel, iterations=1) mask = cv2.resize(mask, (128, 96)) if len(pool_imgs) < 20: pool_imgs.append(img) pool_masks.append(mask) pool_gts.append(gt_path_one) dist_imgs.append(dist_img) dist_img_pres.append(dist_img_pre) else: big_img = np.concatenate(pool_imgs, axis=0) big_mask = np.concatenate(pool_masks, axis=0) pool_gt = np.concatenate(pool_gts, axis=0) pool_dist_img_pres = np.concatenate(dist_img_pres, axis=0) pool_dist_img = np.concatenate(dist_imgs, axis=0) big_img_mask = Image.fromarray( np.concatenate([ big_img, big_mask, pool_gt, pool_dist_img_pres, pool_dist_img ], axis=1)) big_img_mask.save('{}/{}.png'.format(output_dir, count)) print('{}/{}.png'.format(output_dir, count)) count += 1 pool_imgs, pool_masks, pool_gts, dist_imgs, dist_img_pres = [], [], [], [], []