def main(): args = parse_args() Messages = post_messages(args.msgrest) cfg = mmcv.Config.fromfile(args.cfg) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.model.pretrained = None cfg.data.test.test_mode = True # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # build the dataloader cfg.pred_dir = args.image_dir cfg.data.pred.img_prefix = args.image_dir cfg.data.pred.ann_file = os.path.join(args.image_dir, 'ImageSets/Main/test.txt') dataset = build_dataset(cfg.data.pred) data_loader = build_dataloader(dataset, imgs_per_gpu=cfg.data.imgs_per_gpu, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) model.eval() fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fg16_model(model) checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') if 'CLASSES' in checkpoint['meta']: model.CLASSES = checkpoint['meta']['CLASSES'] else: model.CLASSES = dataset.CLASSES outputs = [] pred = [] model = MMDataParallel(model, device_ids=[args.gpu]) prog_bar = mmcv.ProgressBar(len(dataset)) with torch.no_grad(): for i, data in enumerate(data_loader): IDs = [] file_list = [] output_tmp = model(return_loss=True, **data) outputs += [output_tmp] for j in range(len(data['img_meta'].data[0])): IDs += [data['img_meta'].data[0][j]['IDs']] file_list += [data['img_meta'].data[0][j]['filename']] pred = align_box_and_type_color(data, output_tmp, IDs, file_list, pred) for _ in range(2): prog_bar.update() if not (args.output_dir is None): write_dir = args.output_dir else: write_dir = os.path.join('./work_dirs', 'pred_type_color.pth') with open(write_dir, 'w') as f: json.dump(pred, f) print('file saved at: ', write_dir) if args.msgrest: print("send http message") msg = {} msg['type'] = "15" #distiguish picture msg['taskid'] = args.taskid msg['index'] = '1' msg['label_info'] = pred msg['total'] = '1' data_json = json.dumps(msg) Messages.post(data_json)
def make_lmdb(data_path, lmdb_path, img_path_list, keys, batch=5000, compress_level=1, multiprocessing_read=False, n_thread=40): """Make lmdb. Contents of lmdb. The file structure is: example.lmdb ├── data.mdb ├── lock.mdb ├── meta_info.txt The data.mdb and lock.mdb are standard lmdb files and you can refer to https://lmdb.readthedocs.io/en/release/ for more details. The meta_info.txt is a specified txt file to record the meta information of our datasets. It will be automatically created when preparing datasets by our provided dataset tools. Each line in the txt file records 1)image name (with extension), 2)image shape, and 3)compression level, separated by a white space. For example, the meta information could be: `000_00000000.png (720,1280,3) 1`, which means: 1) image name (with extension): 000_00000000.png; 2) image shape: (720,1280,3); 3) compression level: 1 We use the image name without extension as the lmdb key. If `multiprocessing_read` is True, it will read all the images to memory using multiprocessing. Thus, your server needs to have enough memory. Args: data_path (str): Data path for reading images. lmdb_path (str): Lmdb save path. img_path_list (str): Image path list. keys (str): Used for lmdb keys. batch (int): After processing batch images, lmdb commits. Default: 5000. compress_level (int): Compress level when encoding images. Default: 1. multiprocessing_read (bool): Whether use multiprocessing to read all the images to memory. Default: False. n_thread (int): For multiprocessing. """ assert len(img_path_list) == len(keys), ( 'img_path_list and keys should have the same length, ' f'but got {len(img_path_list)} and {len(keys)}') print(f'Create lmdb for {data_path}, save to {lmdb_path}...') print(f'Total images: {len(img_path_list)}') if not lmdb_path.endswith('.lmdb'): raise ValueError("lmdb_path must end with '.lmdb'.") if osp.exists(lmdb_path): print(f'Folder {lmdb_path} already exists. Exit.') sys.exit(1) if multiprocessing_read: # read all the images to memory (multiprocessing) dataset = {} # use dict to keep the order for multiprocessing shapes = {} print(f'Read images with multiprocessing, #thread: {n_thread} ...') prog_bar = mmcv.ProgressBar(len(img_path_list)) def callback(arg): """get the image data and update prog_bar.""" key, dataset[key], shapes[key] = arg prog_bar.update() pool = Pool(n_thread) for path, key in zip(img_path_list, keys): pool.apply_async(read_img_worker, args=(osp.join(data_path, path), key, compress_level), callback=callback) pool.close() pool.join() print(f'Finish reading {len(img_path_list)} images.') # create lmdb environment # obtain data size for one image img = mmcv.imread(osp.join(data_path, img_path_list[0]), flag='unchanged') _, img_byte = cv2.imencode('.png', img, [cv2.IMWRITE_PNG_COMPRESSION, compress_level]) data_size_per_img = img_byte.nbytes print('Data size per image is: ', data_size_per_img) data_size = data_size_per_img * len(img_path_list) env = lmdb.open(lmdb_path, map_size=data_size * 10) # write data to lmdb prog_bar = mmcv.ProgressBar(len(img_path_list)) txn = env.begin(write=True) txt_file = open(osp.join(lmdb_path, 'meta_info.txt'), 'w') for idx, (path, key) in enumerate(zip(img_path_list, keys)): prog_bar.update() key_byte = key.encode('ascii') if multiprocessing_read: img_byte = dataset[key] h, w, c = shapes[key] else: _, img_byte, img_shape = read_img_worker(osp.join(data_path, path), key, compress_level) h, w, c = img_shape txn.put(key_byte, img_byte) # write meta information txt_file.write(f'{key}.png ({h},{w},{c}) {compress_level}\n') if idx % batch == 0: txn.commit() txn = env.begin(write=True) txn.commit() env.close() txt_file.close() print('\nFinish writing lmdb.')
def extract_inception_features(dataloader, inception, num_samples, inception_style='pytorch'): """Extract inception features for FID metric. Args: dataloader (:obj:`DataLoader`): Dataloader for images. inception (nn.Module): Inception network. num_samples (int): The number of samples to be extracted. inception_style (str): The style of Inception network, "pytorch" or "stylegan". Defaults to "pytorch". Returns: torch.Tensor: Inception features. """ batch_size = dataloader.batch_size num_iters = num_samples // batch_size if num_iters * batch_size < num_samples: num_iters += 1 # define mmcv progress bar pbar = mmcv.ProgressBar(num_iters) feature_list = [] curr_iter = 1 for data in dataloader: # a dirty walkround to support multiple datasets (mainly for the # unconditional dataset and conditional dataset). In our # implementation, unconditioanl dataset will return real images with # the key "real_img". However, the conditional dataset contains a key # "img" denoting the real images. if 'real_img' in data: # Mainly for the unconditional dataset in our MMGeneration img = data['real_img'] else: # Mainly for conditional dataset in MMClassification img = data['img'] pbar.update() # the inception network is not wrapped with module wrapper. if not is_module_wrapper(inception): # put the img to the module device img = img.to(get_module_device(inception)) if inception_style == 'stylegan': img = (img * 127.5 + 128).clamp(0, 255).to(torch.uint8) feature = inception(img, return_features=True) else: feature = inception(img)[0].view(img.shape[0], -1) feature_list.append(feature.to('cpu')) if curr_iter >= num_iters: break curr_iter += 1 # Attention: the number of features may be different as you want. features = torch.cat(feature_list, 0) assert features.shape[0] >= num_samples features = features[:num_samples] # to change the line after pbar sys.stdout.write('\n') return features
def single_test_json(model, data_loader, post_processor, save_json_file, show=True, show_path=None, debug_f=True, gt_ann_path=None): """ :param model: model :param data_loader: data_loader :param post_processor: use this to generate bbox from mask :param save_json_file: the path of json file. :return: """ # first get the pred, then get the bboxes from masks # use post_processor to filter the bboxes and then save in json file. # masks are generated by a set of data augmentation functions. model.eval() # get the img_infos from dataset, dataset = data_loader.dataset img_prefix = dataset.img_prefix img_norm_cfg = dataset.img_norm_cfg prog_bar = mmcv.ProgressBar(len(dataset)) if debug_f: assert osp.isfile(gt_ann_path) with open(gt_ann_path, 'r', encoding='utf-8') as f: eval_gt_annotations = json.loads(f.read(), object_pairs_hook=OrderedDict) imgs_bboxes_results = {} if show and (show_path is not None and not osp.isdir(show_path)): os.mkdir(show_path) for i, data in enumerate(data_loader): """ get the name, height, width from data['img_meta'] for multi-scale test, img_meta contains several img_meta """ with torch.no_grad(): # can change this to True. result = model(return_loss=False, rescale=True, **data) # deal with the mask and post processing here. # as masks are fit to the original imgs, just reload the original imgs # if isinstance(result, tuple): bbox_result, segm_result = result else: bbox_result, segm_result = result, None img_tensor = data['img'][0] # for aug test data['img'] is a list. img_metas = data['img_meta'][0].data[0] # datacontainer, return ._data filename = img_metas[0]['filename'] img_name = osp.splitext(filename)[0] # for eval. img_name = img_name.replace('gt_', 'res_') imgs = tensor2imgs(img_tensor, **img_norm_cfg) assert len(imgs) == len(img_metas) img_meta_0 = img_metas[0] vs_bbox_result = np.vstack(bbox_result) if segm_result is None: pred_bboxes, pred_bbox_scores = [], [] else: if isinstance(segm_result, tuple): segm_scores = segm_result[-1] segms = mmcv.concat_list(segm_result[0]) else: segm_scores = np.asarray(vs_bbox_result[:, -1]) segms = mmcv.concat_list(segm_result) pred_bboxes, pred_bbox_scores = post_processor.process( segms, segm_scores, mask_shape=img_meta_0['ori_shape'], scale_factor=(1.0, 1.0)) # save the results. single_pred_results = [] for pred_bbox, pred_bbox_score in zip(pred_bboxes, pred_bbox_scores): pred_bbox = np.asarray(pred_bbox).reshape((-1, 2)).astype(np.int32) pred_bbox = pred_bbox.tolist() single_bbox_dict = { "points": pred_bbox, "confidence": float(pred_bbox_score) } single_pred_results.append(single_bbox_dict) imgs_bboxes_results[img_name] = single_pred_results if show: img = cv2.imread(osp.join(img_prefix, filename)) for idx in range(len(single_pred_results)): bbox = np.asarray(single_pred_results[idx]["points"]).reshape( -1, 2).astype(np.int64) cv2.drawContours(img, [bbox], -1, (0, 255, 0), 2) if debug_f and eval_gt_annotations is not None: gt_annos = eval_gt_annotations[img_name] for gt_idx in range(len(gt_annos)): gt_bbox = np.asarray(gt_annos[gt_idx]["points"]).reshape( -1, 2).astype(np.int64) if gt_annos[gt_idx]["illegibility"]: # if ignore red color = (255, 0, 0) else: # if not ignore blue color = (0, 0, 255) cv2.drawContours(img, [gt_bbox], -1, color, 2) cv2.imwrite(osp.join(show_path, filename), img) batch_size = data['img'][0].size(0) for _ in range(batch_size): prog_bar.update() # print the postmodule pics. with open(save_json_file, 'w+', encoding='utf-8') as f: json.dump(imgs_bboxes_results, f)
def single_gpu_test(model, data_loader, show=False, out_dir=None, show_score_thr=0.3): model.eval() results = [] dataset = data_loader.dataset PALETTE = getattr(dataset, 'PALETTE', None) prog_bar = mmcv.ProgressBar(len(dataset)) for i, data in enumerate(data_loader): with torch.no_grad(): result = model(return_loss=False, rescale=True, **data) batch_size = len(result) if show or out_dir: if batch_size == 1 and isinstance(data['img'][0], torch.Tensor): img_tensor = data['img'][0] else: img_tensor = data['img'][0].data[0] img_metas = data['img_metas'][0].data[0] imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg']) assert len(imgs) == len(img_metas) for i, (img, img_meta) in enumerate(zip(imgs, img_metas)): h, w, _ = img_meta['img_shape'] img_show = img[:h, :w, :] ori_h, ori_w = img_meta['ori_shape'][:-1] img_show = mmcv.imresize(img_show, (ori_w, ori_h)) if out_dir: out_file = osp.join(out_dir, img_meta['ori_filename']) else: out_file = None model.module.show_result(img_show, result[i], bbox_color=PALETTE, text_color=PALETTE, mask_color=PALETTE, show=show, out_file=out_file, score_thr=show_score_thr) # encode mask results if isinstance(result[0], tuple): result = [(bbox_results, encode_mask_results(mask_results)) for bbox_results, mask_results in result] # This logic is only used in panoptic segmentation test. elif isinstance(result[0], dict) and 'ins_results' in result[0]: for j in range(len(result)): bbox_results, mask_results = result[j]['ins_results'] result[j]['ins_results'] = (bbox_results, encode_mask_results(mask_results)) results.extend(result) for _ in range(batch_size): prog_bar.update() return results
def my_make_lmdb(mode, data_path, lmdb_path, batch=5000, compress_level=1): """Create lmdb for the Vimeo90K dataset.(do not need training list) Contents of lmdb. The file structure is: example.lmdb ├── data.mdb ├── lock.mdb ├── meta_info.txt The data.mdb and lock.mdb are standard lmdb files and you can refer to https://lmdb.readthedocs.io/en/release/ for more details. The meta_info.txt is a specified txt file to record the meta information of our datasets. It will be automatically created when preparing datasets by our provided dataset tools. Each line in the txt file records 1)image name (with extension), 2)image shape, and 3)compression level, separated by a white space. For example, the meta information could be: `000_00000000.png (720,1280,3) 1`, which means: 1) image name (with extension): 000_00000000.png; 2) image shape: (720,1280,3); 3) compression level: 1 We use the image name without extension as the lmdb key. Args: mode (str): Dataset mode. 'gt' or 'lq'. data_path (str): Data path for reading images. lmdb_path (str): Lmdb save path. batch (int): After processing batch images, lmdb commits. Default: 5000. compress_level (int): Compress level when encoding images. Default: 1. """ print(f'Create lmdb for {data_path}, save to {lmdb_path}...') if mode == 'gt': h_dst, w_dst = 256, 448 else: h_dst, w_dst = 64, 112 if osp.exists(lmdb_path): print(f'Folder {lmdb_path} already exists. Exit.') sys.exit(1) print('Reading image path list ...') train_list = [] for filedir in os.listdir(data_path): train_list.append(filedir) all_img_list = [] keys = [] for line in train_list: folder, sub_folder = line.split('_') for j in range(0, 7): all_img_list.append(osp.join(data_path, line, f'frame_0000{j}.png')) keys.append('{}_{}_{}'.format(folder, sub_folder, j + 1)) all_img_list = sorted(all_img_list) keys = sorted(keys) if mode == 'gt': # only read the 4th frame for the gt mode print('Only keep the 4th frame for gt mode.') all_img_list = [v for v in all_img_list if v.endswith('3.png')] keys = [v for v in keys if v.endswith('_4')] # create lmdb environment # obtain data size for one image img = mmcv.imread(osp.join(data_path, all_img_list[0]), flag='unchanged') _, img_byte = cv2.imencode('.png', img, [cv2.IMWRITE_PNG_COMPRESSION, compress_level]) data_size_per_img = img_byte.nbytes print('Data size per image is: ', data_size_per_img) data_size = data_size_per_img * len(all_img_list) env = lmdb.open(lmdb_path, map_size=data_size * 10) # write data to lmdb pbar = mmcv.ProgressBar(len(all_img_list)) txn = env.begin(write=True) txt_file = open(osp.join(lmdb_path, 'meta_info.txt'), 'w') for idx, (path, key) in enumerate(zip(all_img_list, keys)): pbar.update() key_byte = key.encode('ascii') img = mmcv.imread(osp.join(data_path, path), flag='unchanged') h, w, c = img.shape _, img_byte = cv2.imencode( '.png', img, [cv2.IMWRITE_PNG_COMPRESSION, compress_level]) assert h == h_dst and w == w_dst and c == 3, f'Wrong shape ({h, w}), should be ({h_dst, w_dst}).' txn.put(key_byte, img_byte) # write meta information txt_file.write(f'{key}.png ({h},{w},{c}) {compress_level}\n') if idx % batch == 0: txn.commit() txn = env.begin(write=True) txn.commit() env.close() txt_file.close() print('\nFinish writing lmdb.')
def after_train_iter(self, runner): """The behavior after each train iteration. Args: runner (``mmcv.runner.BaseRunner``): The runner. """ interval = self.get_current_interval(runner) if not self.every_n_iters(runner, interval): return runner.model.eval() source_domain = runner.model.module.get_other_domains( self.target_domain)[0] # feed real images max_num_images = max(metric.num_images for metric in self.metrics) for metric in self.metrics: if metric.num_real_feeded >= metric.num_real_need: continue mmcv.print_log(f'Feed reals to {metric.name} metric.', 'mmgen') # feed in real images for data in self.dataloader: # key for translation model if f'img_{self.target_domain}' in data: reals = data[f'img_{self.target_domain}'] # key for conditional GAN else: raise KeyError( 'Cannot found key for images in data_dict. ') num_feed = metric.feed(reals, 'reals') if num_feed <= 0: break mmcv.print_log(f'Sample {max_num_images} fake images for evaluation', 'mmgen') rank, ws = get_dist_info() # define mmcv progress bar if rank == 0: pbar = mmcv.ProgressBar(max_num_images) # feed in fake images for data in self.dataloader: # key for translation model if f'img_{source_domain}' in data: with torch.no_grad(): output_dict = runner.model( data[f'img_{source_domain}'], test_mode=True, target_domain=self.target_domain, **self.sample_kwargs) fakes = output_dict['target'] # key Error else: raise KeyError('Cannot found key for images in data_dict. ') # sampling fake images and directly send them to metrics # pbar update number for one proc num_update = 0 for metric in self.metrics: if metric.num_fake_feeded >= metric.num_fake_need: continue num_feed = metric.feed(fakes, 'fakes') num_update = max(num_update, num_feed) if num_feed <= 0: break if rank == 0: if num_update > 0: pbar.update(num_update * ws) runner.log_buffer.clear() # a dirty walkround to change the line at the end of pbar if rank == 0: sys.stdout.write('\n') for metric in self.metrics: with torch.no_grad(): metric.summary() for name, val in metric._result_dict.items(): runner.log_buffer.output[name] = val # record best metric and save the best ckpt if self.save_best_ckpt and name in self.best_metric: self._save_best_ckpt(runner, val, name) runner.log_buffer.ready = True runner.model.train() # clear all current states for next evaluation for metric in self.metrics: metric.clear()
sequence_info = sequence.sequence_info min_frame_idx = sequence.min_frame_idx max_frame_idx = sequence.max_frame_idx image_size = sequence.image_size print(f'[{i + 1}/{num_seqs}] Processing sequence {sequence_name} ...') if args.save_video: # create video writer fourcc = cv2.VideoWriter_fourcc(*'MJPG') save_video_path = os.path.join(cfg.output_dir, f'{sequence_name}.avi') writer = cv2.VideoWriter(save_video_path, fourcc, 20, (image_size[1], image_size[0])) videotracker.init_tracker() prog_bar = mmcv.ProgressBar(max_frame_idx) for frame_idx in range(min_frame_idx, max_frame_idx + 1): frame_info = sequence_info[frame_idx] videotracker.step(frame_info) prog_bar.update() results = videotracker.get_results() print('\n') output_file = os.path.join(cfg.output_dir, f'{sequence_name}.txt') with open(output_file, 'w') as f: for frame_idx, frame_result in results.items(): if args.save_video: img = cv2.imread(sequence_info[frame_idx].filename) for row in frame_result: print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1' %
def main(): args = parse_args() config_file1 = './swa/swa_cascade_rcnn_r50_rfp_sac_iou_alldata-v3_e15/swa_cascade_rcnn_r50_rfp_sac_iou_alldata-v3_e15.py' # checkpoint_file1 = './swa/swa_cascade_rcnn_r50_rfp_sac_iou_alldata-v3_e15/swa_model_12.pth' config_file2 = './swa/cascade_rcnn_r50_rfp_sac_iou_ls_alldata-v3_e15.py' checkpoint_file2 = './swa/epoch_15.pth' device = 'cuda:0' cfg1 = Config.fromfile(config_file1) cfg2 = Config.fromfile(config_file2) # build model # model1 model1 = build_detector(cfg1.model, test_cfg=cfg1.get('test_cfg')) load_checkpoint(model1, checkpoint_file1, map_location=device) # model2 model2 = build_detector(cfg2.model, test_cfg=cfg2.get('test_cfg')) load_checkpoint(model2, checkpoint_file2, map_location=device) test_json_raw = json.load(open(cfg1.data.test.ann_file)) imgid2name = {} for imageinfo in test_json_raw['images']: imgid = imageinfo['id'] imgid2name[imageinfo['file_name']] = imgid wrap_fp16_model(model1) # 采用fp16加速预测 wrap_fp16_model(model2) # build the dataloader samples_per_gpu = cfg1.data.test.pop('samples_per_gpu', 1) # aug_test不支持batch_size>1 dataset = build_dataset(cfg1.data.test) data_loader = build_dataloader(dataset, samples_per_gpu=samples_per_gpu, workers_per_gpu=4, dist=False, shuffle=False) model1 = MMDataParallel(model1, device_ids=[0]) # 为啥加?(不加就错了) model2 = MMDataParallel(model2, device_ids=[0]) model1.eval() model2.eval() json_results = [] dataset = data_loader.dataset prog_bar = mmcv.ProgressBar(len(dataset)) for i, data in enumerate(data_loader): with torch.no_grad(): result1 = model1(return_loss=False, rescale=True, **data) result2 = model2(return_loss=False, rescale=True, **data) batch_size = len(result1) assert len(result1) == len(result2) result1 = result1[0] # 每次只输入一张 result2 = result2[0] img_metas = data['img_metas'][0].data[0] img_shape = img_metas[0]['ori_shape'] bboxes, scores, labels = post_predictions(result1, img_shape) e_bboxes, e_scores, e_labels = post_predictions(result2, img_shape) bboxes_list = [bboxes, e_bboxes] scores_list = [scores, e_scores] labels_list = [labels, e_labels] bboxes, scores, labels = weighted_boxes_fusion(bboxes_list, scores_list, labels_list, weights=[1, 1], iou_thr=0.6, skip_box_thr=0.0001, conf_type='max') # basename = img_metas[0]['ori_filename'] # image = cv2.imread(os.path.join(cfg.data.test.img_prefix, basename)) for (box, score, label) in zip(bboxes, scores, labels): xmin, ymin, xmax, ymax = box.tolist() xmin, ymin, xmax, ymax = round( float(xmin) * img_shape[1], 2), round(float(ymin) * img_shape[0], 2), round(float(xmax) * img_shape[1], 2), round(float(ymax) * img_shape[0], 2) data = dict() data['image_id'] = imgid2name[img_metas[0]['ori_filename']] data['bbox'] = [xmin, ymin, xmax - xmin, ymax - ymin] data['score'] = float(score) data['category_id'] = label + 1 json_results.append(data) for _ in range(batch_size): prog_bar.update() mmcv.dump(json_results, args.jsonfile)
def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False, bbox_head=None): """Test model with multiple gpus. This method tests model with multiple gpus and collects the results under two different modes: gpu and cpu modes. By setting 'gpu_collect=True' it encodes results to gpu tensors and use gpu communication for results collection. On cpu mode it saves the results on different gpus to 'tmpdir' and collects them by the rank 0 worker. Args: model (nn.Module): Model to be tested. data_loader (nn.Dataloader): Pytorch data loader. tmpdir (str): Path of directory to save the temporary results from different gpus under cpu mode. gpu_collect (bool): Option to use either gpu or cpu to collect results. Returns: list: The prediction results. """ model.eval() results = [] dataset = data_loader.dataset rank, world_size = get_dist_info() if rank == 0: prog_bar = mmcv.ProgressBar(len(dataset)) time.sleep(2) # This line can prevent deadlock problem in some cases. for i, data in enumerate(data_loader): with torch.no_grad(): result = model(return_loss=False, rescale=True, **data) if bbox_head.type == 'LSHead': if bbox_head.task == 'bbox': extremes = result.pop(-1) result = result[0] elif bbox_head.task == 'segm': bbox_results, poly_results = result img_metas = data['img_metas'][0].data[0] ori_h, ori_w = img_metas[0]['ori_shape'][:-1] encoded_poly_results = encode_poly_results( poly_results, ori_h, ori_w) result = bbox_results, encoded_poly_results elif isinstance(result, tuple): bbox_results, mask_results = result encoded_mask_results = encode_mask_results(mask_results) result = bbox_results, encoded_mask_results results.append(result) if rank == 0: batch_size = len(data['img_metas'][0].data) for _ in range(batch_size * world_size): prog_bar.update() # collect results from all ranks if gpu_collect: results = collect_results_gpu(results, len(dataset)) else: results = collect_results_cpu(results, len(dataset), tmpdir) return results
def __init__(self, release_version, imageset, save_vis=False, show_pseudomask=False, encode='centerness', heatmap_rate=0.5, factor=4, method='min_area', multi_processing=False, num_processor=4): self.release_version = release_version self.imageset = imageset self.encode = encode self.factor = factor self.method = method self.save_dir_names = { 'centerness': 'reverse_centerness_seg', 'gaussian': 'reverse_gaussian_seg', 'ellipse': 'reverse_ellipse_seg' } self.imgDir = './data/{}/{}/coco/{}/'.format(core_dataset, self.release_version, self.imageset) self.annFile = './data/{}/{}/coco/annotations/{}.json'.format( core_dataset, self.release_version, "_".join(ann_file_name)) self.save_vis = save_vis self.show_pseudomask = show_pseudomask self.save_path = './data/{}/{}/{}/{}'.format( core_dataset, self.release_version, self.imageset, self.save_dir_names[self.encode]) self.save_vis_path = './data/{}/{}/{}/pseudomask_vis'.format( core_dataset, self.release_version, self.imageset) mmcv.mkdir_or_exist(self.save_path) mmcv.mkdir_or_exist(self.save_vis_path) self.heatmap_rate = heatmap_rate self.gaussian_image = 255 - generate_gaussian_image( 512, 512, 2.5, threshold=int(self.heatmap_rate * 255)) self.centerness_image = 255 - generate_centerness_image( 512, 512, factor=self.factor, threshold=int(self.heatmap_rate * 255)) self.ellipse_image = 255 - generate_ellipse_image( 512, 512, threshold=int(self.heatmap_rate * 255)) self.anchor_image = { 'centerness': self.centerness_image, 'gaussian': self.gaussian_image, 'ellipse': self.ellipse_image } self.coco = COCO(self.annFile) self.catIds = self.coco.getCatIds(catNms=['']) self.imgIds = self.coco.getImgIds(catIds=self.catIds) self.progress_bar = mmcv.ProgressBar(len(self.imgIds)) self.multi_processing = multi_processing self.pool = Pool(num_processor)
def single_gpu_test(model, data_loader, bbox_head=None, show=False, out_dir=None, show_score_thr=0.3): model.eval() results = [] dataset = data_loader.dataset prog_bar = mmcv.ProgressBar(len(dataset)) for i, data in enumerate(data_loader): with torch.no_grad(): result = model(return_loss=False, rescale=True, show=show, out_dir=out_dir, **data) if show or out_dir: img_tensor = data['img'][0] img_metas = data['img_metas'][0].data[0] imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg']) assert len(imgs) == len(img_metas) for img, img_meta in zip(imgs, img_metas): h, w, _ = img_meta['img_shape'] img_show = img[:h, :w, :] ori_h, ori_w = img_meta['ori_shape'][:-1] img_show = mmcv.imresize(img_show, (ori_w, ori_h)) if out_dir: out_file = osp.join(out_dir, img_meta['ori_filename']) else: out_file = None model.module.show_result(img_show, result, show=show, out_file=out_file, score_thr=show_score_thr) if bbox_head.type == 'LSHead': if bbox_head.task == 'bbox': extremes = result.pop(-1) result = result[0] elif bbox_head.task == 'segm': bbox_results, poly_results = result img_metas = data['img_metas'][0].data[0] ori_h, ori_w = img_metas[0]['ori_shape'][:-1] encoded_poly_results = encode_poly_results( poly_results, ori_h, ori_w) result = bbox_results, encoded_poly_results elif isinstance(result, tuple): bbox_results, mask_results = result encoded_mask_results = encode_mask_results(mask_results) result = bbox_results, encoded_mask_results results.append(result) batch_size = len(data['img_metas'][0].data) for _ in range(batch_size): prog_bar.update() return results
def single_gpu_test(model, data_loader, show=False, out_dir=None, show_score_thr=0.3): model.eval() results = [] dataset = data_loader.dataset prog_bar = mmcv.ProgressBar(len(dataset)) for i, data in enumerate(data_loader): with torch.no_grad(): result = model(return_loss=False, rescale=True, **data) if show or out_dir: img_tensor = data['img'][0] img_metas = data['img_metas'][0].data[0] imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg']) assert len(imgs) == len(img_metas) for img, img_meta in zip(imgs, img_metas): h, w, _ = img_meta['img_shape'] img_show = img[:h, :w, :] ori_h, ori_w = img_meta['ori_shape'][:-1] img_show = mmcv.imresize(img_show, (ori_w, ori_h)) if out_dir: out_file = osp.join(out_dir, img_meta['ori_filename']) else: out_file = None model.module.show_result(img_show, result, show=show, out_file=out_file, score_thr=show_score_thr) # encode mask results if isinstance(result, tuple) and len(result) == 2: # Mask R-CNN bbox_results, mask_results = result encoded_mask_results = encode_mask_results(mask_results) result = bbox_results, encoded_mask_results elif isinstance(result, tuple) and len(result) == 3: # Mask R-CNN + Offset bbox_results, mask_results, offset_results = result if mask_results is not None: encoded_mask_results = encode_mask_results(mask_results) result = bbox_results, encoded_mask_results, offset_results else: # only pred offset result = bbox_results, offset_results elif isinstance(result, tuple) and len(result) == 4: # Mask R-CNN + Offset + Height bbox_results, mask_results, offset_results, height_results = result encoded_mask_results = encode_mask_results(mask_results) result = bbox_results, encoded_mask_results, offset_results, height_results results.append(result) batch_size = len(data['img_metas'][0].data) for _ in range(batch_size): prog_bar.update() return results
def update_bn_stats(model, data_loader, num_iters=200, logger=None): """Recompute and update the batch norm stats to make them more precise. During training both BN stats and the weight are changing after every iteration, so the running average can not precisely reflect the actual stats of the current model. In this function, the BN stats are recomputed with fixed weights, to make the running average more precise. Specifically, it computes the true average of per-batch mean/variance instead of the running average. Args: model (nn.Module): The model whose bn stats will be recomputed. data_loader (iterator): The DataLoader iterator. num_iters (int): number of iterations to compute the stats. logger (:obj:`logging.Logger` | None): Logger for logging. Default: None. """ model.train() assert len(data_loader) >= num_iters, ( f'length of dataloader {len(data_loader)} must be greater than ' f'iteration number {num_iters}') if is_parallel_module(model): parallel_module = model model = model.module else: parallel_module = model # Finds all the bn layers with training=True. bn_layers = [ m for m in model.modules() if m.training and isinstance(m, _BatchNorm) ] if len(bn_layers) == 0: print_log('No BN found in model', logger=logger, level=logging.WARNING) return print_log(f'{len(bn_layers)} BN found', logger=logger) # Finds all the other norm layers with training=True. for m in model.modules(): if m.training and isinstance(m, (_InstanceNorm, GroupNorm)): print_log('IN/GN stats will be updated like training.', logger=logger, level=logging.WARNING) # In order to make the running stats only reflect the current batch, the # momentum is disabled. # bn.running_mean = (1 - momentum) * bn.running_mean + momentum * # batch_mean # Setting the momentum to 1.0 to compute the stats without momentum. momentum_actual = [bn.momentum for bn in bn_layers] # pyre-ignore for bn in bn_layers: bn.momentum = 1.0 # Note that running_var actually means "running average of variance" running_mean = [torch.zeros_like(bn.running_mean) for bn in bn_layers] running_var = [torch.zeros_like(bn.running_var) for bn in bn_layers] finish_before_loader = False prog_bar = mmcv.ProgressBar(len(data_loader)) for ind, data in enumerate(data_loader): with torch.no_grad(): parallel_module(**data, return_loss=False) prog_bar.update() for i, bn in enumerate(bn_layers): # Accumulates the bn stats. running_mean[i] += (bn.running_mean - running_mean[i]) / (ind + 1) # running var is actually running_var[i] += (bn.running_var - running_var[i]) / (ind + 1) if (ind + 1) >= num_iters: finish_before_loader = True break assert finish_before_loader, 'Dataloader stopped before ' \ f'iteration {num_iters}' for i, bn in enumerate(bn_layers): # Sets the precise bn stats. bn.running_mean = running_mean[i] bn.running_var = running_var[i] bn.momentum = momentum_actual[i]
def single_gpu_test(model, data_loader, show=False, out_dir=None, fps=3, show_score_thr=0.3): """Test model with single gpu. Args: model (nn.Module): Model to be tested. data_loader (nn.Dataloader): Pytorch data loader. show (bool, optional): If True, visualize the prediction results. Defaults to False. out_dir (str, optional): Path of directory to save the visualization results. Defaults to None. fps (int, optional): FPS of the output video. Defaults to 3. show_score_thr (float, optional): The score threshold of visualization (Only used in VID for now). Defaults to 0.3. Returns: dict[str, list]: The prediction results. """ model.eval() results = defaultdict(list) dataset = data_loader.dataset prev_img_meta = None prog_bar = mmcv.ProgressBar(len(dataset)) for i, data in enumerate(data_loader): with torch.no_grad(): result = model(return_loss=False, rescale=True, **data) batch_size = data['img'][0].size(0) if show or out_dir: assert batch_size == 1, 'Only support batch_size=1 when testing.' img_tensor = data['img'][0] img_meta = data['img_metas'][0].data[0][0] img = tensor2imgs(img_tensor, **img_meta['img_norm_cfg'])[0] h, w, _ = img_meta['img_shape'] img_show = img[:h, :w, :] ori_h, ori_w = img_meta['ori_shape'][:-1] img_show = mmcv.imresize(img_show, (ori_w, ori_h)) if out_dir: out_file = osp.join(out_dir, img_meta['ori_filename']) else: out_file = None model.module.show_result(img_show, result, show=show, out_file=out_file, score_thr=show_score_thr) # Whether need to generate a video from images. # The frame_id == 0 means the model starts processing # a new video, therefore we can write the previous video. # There are two corner cases. # Case 1: prev_img_meta == None means there is no previous video. # Case 2: i == len(dataset) means processing the last video need_write_video = (prev_img_meta is not None and img_meta['frame_id'] == 0 or i == len(dataset)) if out_dir and need_write_video: prev_img_prefix, prev_img_name = prev_img_meta[ 'ori_filename'].rsplit('/', 1) prev_img_idx, prev_img_type = prev_img_name.split('.') prev_filename_tmpl = '{:0' + str( len(prev_img_idx)) + 'd}.' + prev_img_type prev_img_dirs = f'{out_dir}/{prev_img_prefix}' prev_img_names = sorted(os.listdir(prev_img_dirs)) prev_start_frame_id = int(prev_img_names[0].split('.')[0]) prev_end_frame_id = int(prev_img_names[-1].split('.')[0]) mmcv.frames2video(prev_img_dirs, f'{prev_img_dirs}/out_video.mp4', fps=fps, fourcc='mp4v', filename_tmpl=prev_filename_tmpl, start=prev_start_frame_id, end=prev_end_frame_id, show_progress=False) prev_img_meta = img_meta for key in result: if 'mask' in key: result[key] = encode_mask_results(result[key]) for k, v in result.items(): results[k].append(v) for _ in range(batch_size): prog_bar.update() return results
def single_gpu_test(model, data_loader, show=False): model.eval() results = [] #pdb.set_trace() dataset = data_loader.dataset prog_bar = mmcv.ProgressBar(len(dataset)) elapsed_time = 0.0 total_elapsed_time = 0.0 training_examples = 0.0 #pdb.set_trace() #load_trt.init() for i, data in enumerate(data_loader): #with torch.autograd.profiler.profile(use_cuda=True) as prof: with torch.no_grad(): #sychronise torch. Executes the command in order #print("Data", len(data['img'])) # b,c,h,w = data['img'][0].shape # h1 = int((h*scale)) # w1 = int((w*scale)) # data['img'][0] = data['img'][0][:,:,:h1+1,:w1+1] t1_start = perf_counter() #pdb.set_trace() result = model(return_loss=False, rescale=not show, **data) torch.cuda.synchronize() t1_stop = perf_counter() elapsed_time += t1_stop - t1_start training_examples = training_examples + 1 if (i % 100 == 0): print("Average Elapsed time", elapsed_time / 100.0) total_elapsed_time += elapsed_time elapsed_time = 0.0 #print("-----------------------------------------------------------------------") #pdb.set_trace() #print("Result",len(result)) results.append(result) if show: model.module.show_result(data, result) batch_size = data['img'][0].size(0) for _ in range(batch_size): prog_bar.update() #print(prof) from mmdet.models.detectors.single_stage import time_torch2trt init_time_single = time_torch2trt from mmdet.models.detectors.two_stage import time_torch2trt init_time_two = time_torch2trt print("\n Total ELAPSED TIME", total_elapsed_time) print("\n Training examples", training_examples) print("Time taken to initialize torch2trt", max(init_time_single, init_time_two)) avg_elapsed_time = ( (total_elapsed_time - max(init_time_single, init_time_two)) / training_examples) #avg_elapsed_time = ((total_elapsed_time)/training_examples) print("\n Average elapsed time", avg_elapsed_time) with open( "/home/nsathish/Efficient_object_detection/mmdetection/results/TensorRT_FP16-1080.txt", "a") as myfile: myfile.write("Average elapsed time:" + str(avg_elapsed_time) + "\n") return results
def single_gpu_test(model, data_loader, show=False, out_dir=None, efficient_test=False, opacity=0.5): """Test with single GPU. Args: model (nn.Module): Model to be tested. data_loader (utils.data.Dataloader): Pytorch data loader. show (bool): Whether show results during inference. Default: False. out_dir (str, optional): If specified, the results will be dumped into the directory to save output results. efficient_test (bool): Whether save the results as local numpy files to save CPU memory during evaluation. Default: False. opacity(float): Opacity of painted segmentation map. Default 0.5. Must be in (0, 1] range. Returns: list: The prediction results. """ model.eval() results = [] dataset = data_loader.dataset prog_bar = mmcv.ProgressBar(len(dataset)) for i, data in enumerate(data_loader): with torch.no_grad(): result = model(return_loss=False, **data) if show or out_dir: img_tensor = data['img'][0] img_metas = data['img_metas'][0].data[0] imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg']) assert len(imgs) == len(img_metas) for img, img_meta in zip(imgs, img_metas): h, w, _ = img_meta['img_shape'] img_show = img[:h, :w, :] ori_h, ori_w = img_meta['ori_shape'][:-1] img_show = mmcv.imresize(img_show, (ori_w, ori_h)) if out_dir: out_file = osp.join(out_dir, img_meta['ori_filename']) else: out_file = None model.module.show_result(img_show, result, palette=dataset.PALETTE, show=show, out_file=out_file, opacity=opacity) if isinstance(result, list): if efficient_test: result = [np2tmp(_) for _ in result] results.extend(result) else: if efficient_test: result = np2tmp(result) results.append(result) batch_size = len(result) for _ in range(batch_size): prog_bar.update() return results
def main(): args = parse_args() frame_paths, original_frames = frame_extraction(args.video) num_frame = len(frame_paths) h, w, _ = original_frames[0].shape # resize frames to shortside 256 new_w, new_h = mmcv.rescale_size((w, h), (256, np.Inf)) frames = [mmcv.imresize(img, (new_w, new_h)) for img in original_frames] w_ratio, h_ratio = new_w / w, new_h / h # Get clip_len, frame_interval and calculate center index of each clip config = mmcv.Config.fromfile(args.config) config.merge_from_dict(args.cfg_options) val_pipeline = config.data.val.pipeline sampler = [x for x in val_pipeline if x['type'] == 'SampleAVAFrames'][0] clip_len, frame_interval = sampler['clip_len'], sampler['frame_interval'] window_size = clip_len * frame_interval assert clip_len % 2 == 0, 'We would like to have an even clip_len' # Note that it's 1 based here timestamps = np.arange(window_size // 2, num_frame + 1 - window_size // 2, args.predict_stepsize) # Load label_map label_map = load_label_map(args.label_map) try: if config['data']['train']['custom_classes'] is not None: label_map = { id + 1: label_map[cls] for id, cls in enumerate(config['data']['train'] ['custom_classes']) } except KeyError: pass # Get Human detection results center_frames = [frame_paths[ind - 1] for ind in timestamps] human_detections = detection_inference(args, center_frames) for i in range(len(human_detections)): det = human_detections[i] det[:, 0:4:2] *= w_ratio det[:, 1:4:2] *= h_ratio human_detections[i] = torch.from_numpy(det[:, :4]).to(args.device) # Get img_norm_cfg img_norm_cfg = config['img_norm_cfg'] if 'to_rgb' not in img_norm_cfg and 'to_bgr' in img_norm_cfg: to_bgr = img_norm_cfg.pop('to_bgr') img_norm_cfg['to_rgb'] = to_bgr img_norm_cfg['mean'] = np.array(img_norm_cfg['mean']) img_norm_cfg['std'] = np.array(img_norm_cfg['std']) # Build STDET model try: # In our spatiotemporal detection demo, different actions should have # the same number of bboxes. config['model']['test_cfg']['rcnn']['action_thr'] = .0 except KeyError: pass config.model.backbone.pretrained = None model = build_detector(config.model, test_cfg=config.get('test_cfg')) load_checkpoint(model, args.checkpoint, map_location=args.device) model.to(args.device) model.eval() predictions = [] print('Performing SpatioTemporal Action Detection for each clip') assert len(timestamps) == len(human_detections) prog_bar = mmcv.ProgressBar(len(timestamps)) for timestamp, proposal in zip(timestamps, human_detections): if proposal.shape[0] == 0: predictions.append(None) continue start_frame = timestamp - (clip_len // 2 - 1) * frame_interval frame_inds = start_frame + np.arange(0, window_size, frame_interval) frame_inds = list(frame_inds - 1) imgs = [frames[ind].astype(np.float32) for ind in frame_inds] _ = [mmcv.imnormalize_(img, **img_norm_cfg) for img in imgs] # THWC -> CTHW -> 1CTHW input_array = np.stack(imgs).transpose((3, 0, 1, 2))[np.newaxis] input_tensor = torch.from_numpy(input_array).to(args.device) with torch.no_grad(): result = model( return_loss=False, img=[input_tensor], img_metas=[[dict(img_shape=(new_h, new_w))]], proposals=[[proposal]]) result = result[0] prediction = [] # N proposals for i in range(proposal.shape[0]): prediction.append([]) # Perform action score thr for i in range(len(result)): if i + 1 not in label_map: continue for j in range(proposal.shape[0]): if result[i][j, 4] > args.action_score_thr: prediction[j].append((label_map[i + 1], result[i][j, 4])) predictions.append(prediction) prog_bar.update() results = [] for human_detection, prediction in zip(human_detections, predictions): results.append(pack_result(human_detection, prediction, new_h, new_w)) def dense_timestamps(timestamps, n): """Make it nx frames.""" old_frame_interval = (timestamps[1] - timestamps[0]) start = timestamps[0] - old_frame_interval / n * (n - 1) / 2 new_frame_inds = np.arange( len(timestamps) * n) * old_frame_interval / n + start return new_frame_inds.astype(np.int) dense_n = int(args.predict_stepsize / args.output_stepsize) frames = [ cv2.imread(frame_paths[i - 1]) for i in dense_timestamps(timestamps, dense_n) ] print('Performing visualization') vis_frames = visualize(frames, results) vid = mpy.ImageSequenceClip([x[:, :, ::-1] for x in vis_frames], fps=args.output_fps) vid.write_videofile(args.out_filename) tmp_frame_dir = osp.dirname(frame_paths[0]) shutil.rmtree(tmp_frame_dir)
def after_train_iter(self, runner): """The behavior after each train iteration. Args: runner (``mmcv.runner.BaseRunner``): The runner. """ interval = self.get_current_interval(runner) if not self.every_n_iters(runner, interval): return runner.model.eval() batch_size = self.dataloader.batch_size rank, ws = get_dist_info() total_batch_size = batch_size * ws # sample real images max_real_num_images = max(metric.num_images - metric.num_real_feeded for metric in self.metrics) # define mmcv progress bar if rank == 0 and max_real_num_images > 0: mmcv.print_log( f'Sample {max_real_num_images} real images for evaluation', 'mmgen') pbar = mmcv.ProgressBar(max_real_num_images) if max_real_num_images > 0: for data in self.dataloader: if 'real_img' in data: reals = data['real_img'] # key for conditional GAN elif 'img' in data: reals = data['img'] else: raise KeyError('Cannot found key for images in data_dict. ' 'Only support `real_img` for unconditional ' 'datasets and `img` for conditional ' 'datasets.') if reals.shape[1] not in [1, 3]: raise RuntimeError('real images should have one or three ' 'channels in the first, ' 'not % d' % reals.shape[1]) if reals.shape[1] == 1: reals = reals.repeat(1, 3, 1, 1) num_feed = 0 for metric in self.metrics: num_feed_ = metric.feed(reals, 'reals') num_feed = max(num_feed_, num_feed) if num_feed <= 0: break if rank == 0: pbar.update(num_feed) max_num_images = max(metric.num_images for metric in self.metrics) if rank == 0: mmcv.print_log( f'Sample {max_num_images} fake images for evaluation', 'mmgen') # define mmcv progress bar if rank == 0: pbar = mmcv.ProgressBar(max_num_images) # sampling fake images and directly send them to metrics for _ in range(0, max_num_images, total_batch_size): with torch.no_grad(): fakes = runner.model( None, num_batches=batch_size, return_loss=False, **self.sample_kwargs) for metric in self.metrics: # feed in fake images metric.feed(fakes, 'fakes') if rank == 0: pbar.update(total_batch_size) runner.log_buffer.clear() # a dirty walkround to change the line at the end of pbar if rank == 0: sys.stdout.write('\n') for metric in self.metrics: with torch.no_grad(): metric.summary() for name, val in metric._result_dict.items(): runner.log_buffer.output[name] = val # record best metric and save the best ckpt if self.save_best_ckpt and name in self.best_metric: self._save_best_ckpt(runner, val, name) runner.log_buffer.ready = True runner.model.train() # clear all current states for next evaluation for metric in self.metrics: metric.clear()
def single_selsa_gpu_test(model, data_loader, all_frame_interval=21, show=False, rank=0, world_size=1): model.eval() results = [] dataset = data_loader.dataset if rank == 0: prog_bar = mmcv.ProgressBar(len(dataset)) num_images = dataset.size video_frames_strts = [] for x in dataset.img_infos: if 'unique_ids' in x: video_frames_strts.append(x['unique_ids']) else: video_frames_strts.append(x['frame_id']) video_seg_lens = [x['frame_seg_len'] if 'frame_seg_len' in x else x['video_len'] \ for x in dataset.img_infos] batch_size = False video_idx = -1 frame_idx = 0 frame_ids = np.zeros(num_images, dtype=np.int) all_bboxes = [None for i in range(num_images)] t = time.time() for i, data in enumerate(data_loader): batch_size = batch_size if batch_size else len( data['img_meta'].data[0]) img_meta = data['img_meta'].data[0][0] frame_offset = img_meta['frame_offset'] key_frame_flag = dataset.key_frame_flag seg_len = img_meta['seg_len'] t_data = time.time() - t t = time.time() #TODO:X An implementation in `selsa_rcnn` assembling `base.forward()` to extract backbone features #TODO:X An api feeding collated backbone features into selsa_bboxhead #TODO:X Collect the detection results as the original function does if key_frame_flag == 0: feat_list = deque(maxlen=all_frame_interval) frame_offset_list = deque(maxlen=all_frame_interval) img_meta_list = deque(maxlen=all_frame_interval) video_idx += 1 with torch.no_grad(): cur_feat = model(backbone_feat=True, **data) while len(feat_list) < int(all_frame_interval + 1) / 2: feat_list.append(cur_feat[0]) frame_offset_list.append(frame_offset) img_meta_list.append(img_meta) elif key_frame_flag == 2: if len(feat_list) < all_frame_interval - 1: with torch.no_grad(): feat = model(backbone_feat=True, **data) feat_list.append(feat[0]) frame_offset_list.append(frame_offset) img_meta_list.append(img_meta) else: with torch.no_grad(): feat = model(backbone_feat=True, **data) feat_list.append(feat[0]) frame_offset_list.append(frame_offset) img_meta_list.append(img_meta) with torch.no_grad(): # c_img_meta = collate(img_meta_list, all_frame_interval) result = model(x=feat_list, img=None, img_meta=img_meta_list, forward_feat=True, return_loss=False, rescale=not show) # print(result) if dataset.video_shuffle: if not isinstance(video_frames_strts[video_idx], int): frame_ids[frame_idx] = video_frames_strts[video_idx][ frame_offset_list[int( (all_frame_interval - 1) / 2)]] else: frame_ids[frame_idx] = video_frames_strts[video_idx] + \ frame_offset_list[int((all_frame_interval-1)/2)] else: assert "Unshuffled video validation not implemented" all_bboxes[frame_ids[frame_idx] - 1] = result frame_idx += batch_size t_net = time.time() - t if rank == 0: for _ in range(batch_size * world_size): prog_bar.update() elif key_frame_flag == 1: end_counter = 0 with torch.no_grad(): feat = model(backbone_feat=True, **data) while len(feat_list) < all_frame_interval - 1: feat_list.append(feat[0]) frame_offset_list.append(frame_offset) img_meta_list.append(img_meta) while end_counter < min(seg_len, int(all_frame_interval + 1) / 2): feat_list.append(feat[0]) frame_offset_list.append(frame_offset) img_meta_list.append(img_meta) end_counter += 1 with torch.no_grad(): # c_img_meta = collate(img_meta_list, all_frame_interval) result = model(x=feat_list, img=None, img_meta=img_meta_list, forward_feat=True, return_loss=False, rescale=not show) if dataset.video_shuffle: if not isinstance(video_frames_strts[video_idx], int): frame_ids[frame_idx] = video_frames_strts[video_idx][ frame_offset_list[int( (all_frame_interval - 1) / 2)]] else: frame_ids[frame_idx] = video_frames_strts[video_idx] + \ frame_offset_list[int((all_frame_interval-1)/2)] else: assert "Unshuffled video validation not implemented" all_bboxes[frame_ids[frame_idx] - 1] = result frame_idx += batch_size t_net = time.time() - t if rank == 0: for _ in range(batch_size * world_size): prog_bar.update() return all_bboxes
test = gradcheck(CARAFE(5, 4, 2), (feat, mask), atol=1e-4, eps=1e-4) print(test) print('Gradcheck for carafe naive...') test = gradcheck(CARAFENAIVE(5, 4, 2), (feat, mask), atol=1e-4, eps=1e-4) print(test) feat = torch.randn( 2, 1024, 100, 100, requires_grad=True, device='cuda:0').float() mask = torch.randn( 2, 25, 200, 200, requires_grad=True, device='cuda:0').sigmoid().float() loop_num = 500 time_forward = 0 time_backward = 0 bar = mmcv.ProgressBar(loop_num) timer = mmcv.Timer() for i in range(loop_num): x = carafe(feat.clone(), mask.clone(), 5, 1, 2) torch.cuda.synchronize() time_forward += timer.since_last_check() x.sum().backward(retain_graph=True) torch.cuda.synchronize() time_backward += timer.since_last_check() bar.update() print('\nCARAFE time forward: {} ms/iter | time backward: {} ms/iter'.format( (time_forward + 1e-3) * 1e3 / loop_num, (time_backward + 1e-3) * 1e3 / loop_num)) time_naive_forward = 0 time_naive_backward = 0
def multi_hnl_gpu_test(model, data_loader, all_frame_interval, tmpdir=None, gpu_collect=False, show=False): model.eval() dataset = data_loader.dataset rank, world_size = get_dist_info() if rank == 0: prog_bar = mmcv.ProgressBar(len(dataset)) dataset.is_dataset_global = ['I am going to be mad!!!'] # print("rank: {} message: {}".format(rank, hasattr(dataset, 'is_dataset_global'))) num_images = dataset.local_frame_size_list[rank] # print(num_images) # print(num_images) pos_pointer = [np.max(t) + 1 for t in dataset.local_video_list] video_frames_strts = [] if rank == 0: rank_video_infos = dataset.img_infos[:dataset. global_video_size_list[rank]] else: rank_video_infos = dataset.img_infos[ sum(dataset.global_video_size_list[:rank] ):sum(dataset.global_video_size_list[:rank + 1])] for i, x in enumerate(rank_video_infos): if i == 0: assert x[ 'frame_id'] == 1, "Wrong frame_id of first video in local rank {}".format( rank) if 'unique_ids' in x: video_frames_strts.append(x['unique_ids']) else: video_frames_strts.append(x['frame_id']) video_seg_lens = [x['frame_seg_len'] if 'frame_seg_len' in x else x['video_len'] \ for x in rank_video_infos] batch_size = False video_idx = -1 frame_idx = 0 frame_ids = np.zeros(num_images, dtype=np.int) all_bboxes = [None for i in range(num_images)] t = time.time() print("rank: {} len: {} size: {}".format(rank, len(data_loader), num_images)) for i, data in enumerate(data_loader): batch_size = batch_size if batch_size else len( data['img_meta'].data[0]) img_meta = data['img_meta'].data[0][0] frame_offset = img_meta['frame_offset'] key_frame_flag = dataset.key_frame_flag seg_len = img_meta['seg_len'] t_data = time.time() - t t = time.time() # print("i at rank {}: {}".format(rank, i)) #TODO:X An implementation in `selsa_rcnn` assembling `base.forward()` to extract backbone features #TODO:X An api feeding collated backbone features into selsa_bboxhead #TODO:X Collect the detection results as the original function does if key_frame_flag == 0: feat_list = deque(maxlen=all_frame_interval) frame_offset_list = deque(maxlen=all_frame_interval) img_meta_list = deque(maxlen=all_frame_interval) video_idx += 1 with torch.no_grad(): cur_feat = model(backbone_feat=True, **data) while len(feat_list) < int(all_frame_interval + 1) / 2: feat_list.append(cur_feat[0]) frame_offset_list.append(frame_offset) img_meta_list.append(img_meta) elif key_frame_flag == 2: if len(feat_list) < all_frame_interval - 1: with torch.no_grad(): feat = model(backbone_feat=True, **data) feat_list.append(feat[0]) frame_offset_list.append(frame_offset) img_meta_list.append(img_meta) else: with torch.no_grad(): feat = model(backbone_feat=True, **data) feat_list.append(feat[0]) frame_offset_list.append(frame_offset) img_meta_list.append(img_meta) with torch.no_grad(): # c_img_meta = collate(img_meta_list, all_frame_interval) result = model(x=feat_list, img=None, img_meta=img_meta_list, forward_feat=True, return_loss=False, rescale=not show) # print(result) if dataset.video_shuffle: # print("video_indx: {}, len_video_frames_strts: {}".format(video_idx, len(video_frames_strts))) if not isinstance(video_frames_strts[video_idx], int): frame_ids[frame_idx] = video_frames_strts[video_idx][ frame_offset_list[int( (all_frame_interval - 1) / 2)]] else: frame_ids[frame_idx] = video_frames_strts[video_idx] + \ frame_offset_list[int((all_frame_interval-1)/2)] else: assert "Unshuffled video validation not implemented" try: all_bboxes[frame_ids[frame_idx] - 1] = result except: print("rank: {}, frame_idx: {}, frame_ids[frame_idx]:{}". format(rank, frame_idx, frame_ids[frame_idx])) frame_idx += batch_size t_net = time.time() - t if rank == 0: for _ in range(batch_size * world_size): prog_bar.update() elif key_frame_flag == 1: end_counter = 0 with torch.no_grad(): feat = model(backbone_feat=True, **data) while len(feat_list) < all_frame_interval - 1: feat_list.append(feat[0]) frame_offset_list.append(frame_offset) img_meta_list.append(img_meta) while end_counter < min(seg_len, int(all_frame_interval + 1) / 2): feat_list.append(feat[0]) frame_offset_list.append(frame_offset) img_meta_list.append(img_meta) end_counter += 1 with torch.no_grad(): # c_img_meta = collate(img_meta_list, all_frame_interval) result = model(x=feat_list, img=None, img_meta=img_meta_list, forward_feat=True, return_loss=False, rescale=not show) if dataset.video_shuffle: if not isinstance(video_frames_strts[video_idx], int): frame_ids[frame_idx] = video_frames_strts[video_idx][ frame_offset_list[int( (all_frame_interval - 1) / 2)]] else: frame_ids[frame_idx] = video_frames_strts[video_idx] + \ frame_offset_list[int((all_frame_interval-1)/2)] else: assert "Unshuffled video validation not implemented" try: all_bboxes[frame_ids[frame_idx] - 1] = result except: print( "rank: {}, frame_idx: {}, frame_ids[frame_idx]:{}, end_counter" ) frame_idx += batch_size t_net = time.time() - t if rank == 0: for _ in range(batch_size * world_size): prog_bar.update() # if rank == 3: # print(i) # print("results at rank {}: {}".format(rank, len(all_bboxes))) # print(all_bboxes) if gpu_collect: assert "gpu_collectNot implemented yet!" else: # print(all_bboxes) results = collect_selsa_results_cpu(all_bboxes, len(dataset), tmpdir) return results
def show_results(): cap = cv2.VideoCapture(video_path) num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) msg = 'Preparing action recognition ...' text_info = {} fourcc = cv2.VideoWriter_fourcc(*'mp4v') frame_size = (frame_width, frame_height) ind = 0 video_writer = cv2.VideoWriter(out_file, fourcc, fps, frame_size) prog_bar = mmcv.ProgressBar(num_frames) backup_frames = [] while ind < num_frames: ind += 1 prog_bar.update() ret, frame = cap.read() backup_frames.append(np.array(frame)[:, :, ::-1]) if ind == sample_length: # provide a quick show at the beginning frame_queue.extend(backup_frames) backup_frames = [] elif ((len(backup_frames) == input_step and ind > sample_length) or ind == num_frames): # pick a frame from the backup # when the backup is full or reach the last frame chosen_frame = random.choice(backup_frames) backup_frames = [] frame_queue.append(chosen_frame) ret, scores = inference() if ret: num_selected_labels = min(len(label), 5) scores_tuples = tuple(zip(label, scores)) scores_sorted = sorted(scores_tuples, key=itemgetter(1), reverse=True) results = scores_sorted[:num_selected_labels] result_queue.append(results) if len(result_queue) != 0: text_info = {} results = result_queue.popleft() for i, result in enumerate(results): selected_label, score = result if score < threshold: break location = (0, 40 + i * 20) text = selected_label + ': ' + str(round(score, 2)) text_info[location] = text cv2.putText(frame, text, location, FONTFACE, FONTSCALE, FONTCOLOR, THICKNESS, LINETYPE) elif len(text_info): for location, text in text_info.items(): cv2.putText(frame, text, location, FONTFACE, FONTSCALE, FONTCOLOR, THICKNESS, LINETYPE) else: cv2.putText(frame, msg, (0, 40), FONTFACE, FONTSCALE, MSGCOLOR, THICKNESS, LINETYPE) video_writer.write(frame) cap.release() cv2.destroyAllWindows()
def main(): parser = ArgumentParser() parser.add_argument('config', help='config file') parser.add_argument('--input', help='input video file or folder') parser.add_argument('--output', help='output video file (mp4 format) or folder') parser.add_argument('--checkpoint', help='checkpoint file') parser.add_argument('--device', default='cuda:0', help='device used for inference') parser.add_argument('--show', action='store_true', help='whether show the results on the fly') parser.add_argument('--backend', choices=['cv2', 'plt'], default='cv2', help='the backend to visualize the results') parser.add_argument('--fps', help='FPS of the output video') args = parser.parse_args() assert args.output or args.show # load images if osp.isdir(args.input): imgs = sorted(os.listdir(args.input)) IN_VIDEO = False else: imgs = mmcv.VideoReader(args.input) IN_VIDEO = True # define output if args.output is not None: if args.output.endswith('.mp4'): OUT_VIDEO = True out_dir = tempfile.TemporaryDirectory() out_path = out_dir.name _out = args.output.rsplit('/', 1) if len(_out) > 1: os.makedirs(_out[0], exist_ok=True) else: OUT_VIDEO = False out_path = args.output os.makedirs(out_path, exist_ok=True) fps = args.fps if args.show or OUT_VIDEO: if fps is None and IN_VIDEO: fps = imgs.fps if not fps: raise ValueError('Please set the FPS for the output video.') fps = int(fps) # build the model from a config file and a checkpoint file model = init_model(args.config, args.checkpoint, device=args.device) prog_bar = mmcv.ProgressBar(len(imgs)) # test and show/save the images for i, img in enumerate(imgs): if isinstance(img, str): img = osp.join(args.input, img) result = inference_mot(model, img, frame_id=i) result = result['track_results'] if args.output is not None: if IN_VIDEO or OUT_VIDEO: out_file = osp.join(out_path, f'{i:06d}.jpg') else: out_file = osp.join(out_path, img.rsplit('/', 1)[-1]) else: out_file = None model.show_result(img, result, show=args.show, wait_time=int(1000. / fps) if fps else 0, out_file=out_file, backend=args.backend) prog_bar.update() if OUT_VIDEO: print(f'making the output video at {args.output} with a FPS of {fps}') mmcv.frames2video(out_path, args.output, fps=fps) out_dir.cleanup()
import numpy as np import pandas as pd import mmcv gt_dir = './to_lmdb/val_label_new.txt' pred_dir = './history/test_out_lr_0.00010_batchSize_1_time_0310105604_/epoch_0_step_0_data.txt' stat_csv = './False_record/stat.csv' f1 = open(gt_dir, 'r') f2 = open(pred_dir, 'r') d = {} prog_bar = mmcv.ProgressBar(31402) for i in range(31402): pred = f2.readline().split(' ')[1].strip() gt = f1.readline().split(' ')[1].strip() for p in gt: if p not in pred: if p in d.keys(): d[p] += 1 else: d[p] = 1 prog_bar.update() p = pd.DataFrame(columns=['word', 'num']) for key in d.keys(): p = p.append(pd.DataFrame([[key, d[key]]], columns=['word', 'num'])) print(p) p = p.reset_index(drop=True) ind = np.argsort(p['num'].values, ) p = p.loc[ind[::-1], :] p = p.reset_index(drop=True) print(p) p.to_csv(stat_csv)
def after_train_iter(self, runner): """The behavior after each train iteration. Args: runner (``mmcv.runner.BaseRunner``): The runner. """ if not self.every_n_iters(runner, self.interval): return runner.model.eval() # sample fake images max_num_images = max(metric.num_images for metric in self.metrics) for metric in self.metrics: mmcv.print_log(f'Feed reals to {metric.name} metric.', 'mmgen') # feed in real images for data in self.dataloader: reals = data['real_img'] num_feed = metric.feed(reals, 'reals') if num_feed <= 0: break mmcv.print_log(f'Sample {max_num_images} fake images for evaluation', 'mmgen') batch_size = self.dataloader.batch_size rank, ws = get_dist_info() total_batch_size = batch_size * ws # define mmcv progress bar if rank == 0: pbar = mmcv.ProgressBar(max_num_images) # sampling fake images and directly send them to metrics for _ in range(0, max_num_images, total_batch_size): with torch.no_grad(): fakes = runner.model(None, num_batches=batch_size, return_loss=False, **self.sample_kwargs) for metric in self.metrics: # feed in fake images num_left = metric.feed(fakes, 'fakes') if num_left <= 0: break if rank == 0: pbar.update(total_batch_size) runner.log_buffer.clear() # a dirty walkround to change the line at the end of pbar if rank == 0: sys.stdout.write('\n') for metric in self.metrics: metric.summary() for name, val in metric._result_dict.items(): runner.log_buffer.output[name] = val runner.log_buffer.ready = True runner.model.train() # clear all current states for next evaluation for metric in self.metrics: metric.clear()
def after_train_epoch(self, runner): if not self.every_n_epochs(runner, self.interval): return runner.logger.info("Start evaluation on {} dataset({} images).".format( self.dataset.name, len(self.dataset))) runner.model.eval() # get prog bar if runner.rank == 0: prog_bar = mmcv.ProgressBar(len(self.dataset)) else: prog_bar = None results = [None for _ in range(len(self.dataset))] for idx in range(runner.rank, len(self.dataset), runner.world_size): data = self.dataset[idx] data_gpu = scatter(collate([data], samples_per_gpu=1), [torch.cuda.current_device()])[0] # compute output with torch.no_grad(): result, _ = runner.model(data_gpu) disps = result['disps'] costs = result['costs'] ori_size = data_gpu['original_size'] disps = remove_padding(disps, ori_size) # process the ground truth disparity map data_gpu['leftDisp'] = data_gpu[ 'leftDisp'] if 'leftDisp' in data_gpu else None if data_gpu['leftDisp'] is not None: data_gpu['leftDisp'] = remove_padding( data_gpu['leftDisp'], ori_size) data_gpu['rightDisp'] = data_gpu[ 'rightDisp'] if 'rightDisp' in data_gpu else None if data_gpu['rightDisp'] is not None: data_gpu['rightDisp'] = remove_padding( data_gpu['rightDisp'], ori_size) # evaluation whole_error_dict = disp_evaluation(self.cfg, disps, data_gpu['leftDisp'], data_gpu['rightDisp']) result = { 'Disparity': disps, 'GroundTruth': data_gpu['leftDisp'], 'Error': whole_error_dict, } if self.cfg.model.eval.is_cost_return: if self.cfg.model.eval.is_cost_to_cpu: costs = [cost.cpu() for cost in costs] result['Cost'] = costs # if result contains image, as the process advanced, the cuda cache explodes soon. result = to_cpu(result) filter_result = dict() filter_result['Error'] = result['Error'] if 'Confidence' in result: filter_result['Confidence'] = self.process_conf( result, bins_number=100) results[idx] = filter_result batch_size = runner.world_size if runner.rank == 0: for _ in range(batch_size): prog_bar.update() if runner.rank == 0: print('\n') dist.barrier() for i in range(1, min(runner.world_size, len(self.dataset))): tmp_file = osp.join(runner.work_dir, "temp_{}.pkl".format(i)) tmp_results = mmcv.load(tmp_file) for idx in range(i, len(results), runner.world_size): results[idx] = tmp_results[idx] os.remove(tmp_file) self.evaluate(runner, results) else: tmp_file = osp.join(runner.work_dir, "temp_{}.pkl".format(runner.rank)) mmcv.dump(results, tmp_file) dist.barrier() dist.barrier() torch.cuda.empty_cache()
def after_train_epoch(self, runner): if not self.every_n_epochs(runner, self.interval): return runner.model.eval() results = [None for _ in range(len(self.dataset))] if runner.rank == 0: prog_bar = mmcv.ProgressBar(len(self.dataset)) for idx in range(runner.rank, len(self.dataset), runner.world_size): data = self.dataset[idx] data_gpu = scatter(collate([data], samples_per_gpu=1), [torch.cuda.current_device()])[0] # compute output with torch.no_grad(): result = runner.model(return_loss=False, rescale=True, **data_gpu) results[idx] = result """ Yuan add following code for evaluating miss rate using matlab code. For each image, detection result will be written into it's corresponding text file. Matlab script will load those detection results and perform evaluation. It is finished with matlab engine on background. detection result -> text file -> matlab script """ # image path img_path = self.dataset.img_infos[idx]['filename'] res_path = img_path.replace('images', 'res') # res : result # print() # for debug # print(img_path) # for debug # print(res_path) # for debug if 'visible' in res_path: res_path = res_path.replace('/visible/', '/') res_path = res_path.replace('.jpg', '.txt') res_path = res_path.replace('.png', '.txt') # print(res_path) # for debug if os.path.exists(res_path): os.remove(res_path) os.mknod(res_path) """ For faster-rcnn, the result is a list, each element in list is result for a object class. In pedestrian detection, there is only one class. For RPN, the result is a numpy. The result of RPN is category-independent. """ if isinstance(result, list): np.savetxt(res_path, result[0]) else: np.savetxt(res_path, result) batch_size = runner.world_size for _ in range(batch_size): prog_bar.update() """ yuan comment following code because of new evaluation method. """ # if runner.rank == 0: # print('\n') # dist.barrier() # for i in range(1, runner.world_size): # tmp_file = osp.join(runner.work_dir, 'temp_{}.pkl'.format(i)) # tmp_results = mmcv.load(tmp_file) # for idx in range(i, len(results), runner.world_size): # results[idx] = tmp_results[idx] # os.remove(tmp_file) # self.evaluate(runner, results) # else: # tmp_file = osp.join(runner.work_dir, # 'temp_{}.pkl'.format(runner.rank)) # mmcv.dump(results, tmp_file) # dist.barrier() """ yuan add following line. """ self.evaluate(runner, results) # dist.barrier() self._barrier(runner.rank, runner.world_size)
def show_results(model, data, label, args): frame_queue = deque(maxlen=args.sample_length) result_queue = deque(maxlen=1) cap = cv2.VideoCapture(args.video_path) num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) msg = 'Preparing action recognition ...' text_info = {} out_json = {} fourcc = cv2.VideoWriter_fourcc(*'mp4v') frame_size = (frame_width, frame_height) ind = 0 video_writer = None if args.out_file.endswith('.json') \ else cv2.VideoWriter(args.out_file, fourcc, fps, frame_size) prog_bar = mmcv.ProgressBar(num_frames) backup_frames = [] while ind < num_frames: ind += 1 prog_bar.update() ret, frame = cap.read() if frame is None: # drop it when encounting None continue backup_frames.append(np.array(frame)[:, :, ::-1]) if ind == args.sample_length: # provide a quick show at the beginning frame_queue.extend(backup_frames) backup_frames = [] elif ((len(backup_frames) == args.input_step and ind > args.sample_length) or ind == num_frames): # pick a frame from the backup # when the backup is full or reach the last frame chosen_frame = random.choice(backup_frames) backup_frames = [] frame_queue.append(chosen_frame) ret, scores = inference(model, data, args, frame_queue) if ret: num_selected_labels = min(len(label), 5) scores_tuples = tuple(zip(label, scores)) scores_sorted = sorted(scores_tuples, key=itemgetter(1), reverse=True) results = scores_sorted[:num_selected_labels] result_queue.append(results) if args.out_file.endswith('.json'): text_info, out_json = get_results_json(result_queue, text_info, args.threshold, msg, ind, out_json) else: text_info = show_results_video(result_queue, text_info, args.threshold, msg, frame, video_writer, args.label_color, args.msg_color) cap.release() cv2.destroyAllWindows() if args.out_file.endswith('.json'): with open(args.out_file, 'w') as js: json.dump(out_json, js)
def make_lmdb(mode, data_path, lmdb_path, batch=5000, compress_level=1): """Create lmdb for the REDS dataset. Contents of lmdb. The file structure is: example.lmdb ├── data.mdb ├── lock.mdb ├── meta_info.txt The data.mdb and lock.mdb are standard lmdb files and you can refer to https://lmdb.readthedocs.io/en/release/ for more details. The meta_info.txt is a specified txt file to record the meta information of our datasets. It will be automatically created when preparing datasets by our provided dataset tools. Each line in the txt file records 1)image name (with extension), 2)image shape, and 3)compression level, separated by a white space. For example, the meta information could be: `000_00000000.png (720,1280,3) 1`, which means: 1) image name (with extension): 000_00000000.png; 2) image shape: (720,1280,3); 3) compression level: 1 We use the image name without extension as the lmdb key. Args: mode (str): REDS dataset mode. Choices: ['train_sharp', 'train_blur', 'train_blur_comp', 'train_sharp_bicubic', 'train_blur_bicubic']. They are used to identify different reds dataset for different tasks. Specifically: 'train_sharp': GT frames; 'train_blur': Blur frames for deblur task. 'train_blur_comp': Blur and compressed frames for deblur and compression task. 'train_sharp_bicubic': Bicubic downsampled sharp frames for SR task. 'train_blur_bicubic': Bicubic downsampled blur frames for SR task. data_path (str): Data path for reading images. lmdb_path (str): Lmdb save path. batch (int): After processing batch images, lmdb commits. Default: 5000. compress_level (int): Compress level when encoding images. Default: 1. """ print(f'Create lmdb for {data_path}, save to {lmdb_path}...') if mode in ['train_sharp', 'train_blur', 'train_blur_comp']: h_dst, w_dst = 720, 1280 else: h_dst, w_dst = 180, 320 if osp.exists(lmdb_path): print(f'Folder {lmdb_path} already exists. Exit.') sys.exit(1) print('Reading image path list ...') img_path_list = sorted( list(mmcv.scandir(data_path, suffix='png', recursive=True))) keys = [] for img_path in img_path_list: parts = img_path.split('/') folder = parts[-2] img_name = parts[-1].split('.png')[0] keys.append(folder + '_' + img_name) # example: 000_00000000 # create lmdb environment # obtain data size for one image img = mmcv.imread(osp.join(data_path, img_path_list[0]), flag='unchanged') _, img_byte = cv2.imencode('.png', img, [cv2.IMWRITE_PNG_COMPRESSION, compress_level]) data_size_per_img = img_byte.nbytes print('Data size per image is: ', data_size_per_img) data_size = data_size_per_img * len(img_path_list) env = lmdb.open(lmdb_path, map_size=data_size * 10) # write data to lmdb pbar = mmcv.ProgressBar(len(img_path_list)) txn = env.begin(write=True) txt_file = open(osp.join(lmdb_path, 'meta_info.txt'), 'w') for idx, (path, key) in enumerate(zip(img_path_list, keys)): pbar.update() key_byte = key.encode('ascii') img = mmcv.imread(osp.join(data_path, path), flag='unchanged') h, w, c = img.shape _, img_byte = cv2.imencode( '.png', img, [cv2.IMWRITE_PNG_COMPRESSION, compress_level]) assert h == h_dst and w == w_dst and c == 3, ( f'Wrong shape ({h, w}), should be ({h_dst, w_dst}).') txn.put(key_byte, img_byte) # write meta information txt_file.write(f'{key}.png ({h},{w},{c}) {compress_level}\n') if idx % batch == 0: txn.commit() txn = env.begin(write=True) txn.commit() env.close() txt_file.close() print('\nFinish writing lmdb.')