def __init__(self, dataset, samples_per_gpu=1, num_replicas=None, rank=None): _rank, _num_replicas = get_dist_info() if num_replicas is None: num_replicas = _num_replicas if rank is None: rank = _rank self.dataset = dataset self.samples_per_gpu = samples_per_gpu self.num_replicas = num_replicas self.rank = rank self.epoch = 0 assert hasattr(self.dataset, 'flag') self.flag = self.dataset.flag self.group_sizes = np.bincount(self.flag) self.num_samples = 0 for i, j in enumerate(self.group_sizes): self.num_samples += int( math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu / self.num_replicas)) * self.samples_per_gpu self.total_size = self.num_samples * self.num_replicas
def get_root_logger(log_file=None,log_level=logging.INFO): """Get the root logger. The logger will be initialized if it has not been initialized. By default a StreamHandler will be added. If `log_file` is specified, a FileHandler will also be added. The name of the root logger is the top-level package name, e.g., "SOHO". :param log_file: :param log_level: :return: """ logger = logging.getLogger(__name__.split('.')[0]) if logger.hasHandlers(): return logger format_str = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' logging.basicConfig(format=format_str, level=log_level) rank, _ = get_dist_info() if rank != 0: logger.setLevel('ERROR') elif log_file is not None: file_handler = logging.FileHandler(log_file, 'w') file_handler.setFormatter(logging.Formatter(format_str)) file_handler.setLevel(log_level) logger.addHandler(file_handler) return logger
def collect_results_gpu(result_part, size): rank, world_size = get_dist_info() # dump result part to tensor with pickle part_tensor = torch.tensor(bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda') # gather all result part tensor shape shape_tensor = torch.tensor(part_tensor.shape, device='cuda') shape_list = [shape_tensor.clone() for _ in range(world_size)] dist.all_gather(shape_list, shape_tensor) # padding result part tensor to max length shape_max = torch.tensor(shape_list).max() part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda') part_send[:shape_tensor[0]] = part_tensor part_recv_list = [ part_tensor.new_zeros(shape_max) for _ in range(world_size) ] # gather all result part dist.all_gather(part_recv_list, part_send) if rank == 0: part_list = [] for recv, shape in zip(part_recv_list, shape_list): part_list.append( pickle.loads(recv[:shape[0]].cpu().numpy().tobytes())) # sort the results ordered_results = [] for res in zip(*part_list): ordered_results.extend(list(res)) # the dataloader may pad some samples ordered_results = ordered_results[:size] return ordered_results
def collect_results(result_part, size, tmpdir=None): results_out = {} for k in result_part[0].keys(): results_out[k] = np.concatenate( [batch[k].numpy() for batch in result_part], axis=0) rank, world_size = get_dist_info() # create a tmp dir if it is not specified if tmpdir is None: MAX_LEN = 512 # 32 is whitespace dir_tensor = torch.full((MAX_LEN, ), 32, dtype=torch.uint8, device='cuda') if rank == 0: tmpdir = tempfile.mkdtemp() tmpdir = torch.tensor(bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda') dir_tensor[:len(tmpdir)] = tmpdir dist.broadcast(dir_tensor, 0) tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip() else: commons.mkdir_or_exist(tmpdir) # dump the part result to the dir commons.dump(results_out, os.path.join(tmpdir, 'part_{}.pkl'.format(rank))) dist.barrier() # collect all parts if rank != 0: return None else: # load results of all parts from tmp dir part_list = [] for i in range(world_size): part_file = os.path.join(tmpdir, 'part_{}.pkl'.format(i)) part_list.append(commons.load(part_file)) # sort the results ordered_results = defaultdict(list) out_dict = defaultdict(list) for res in part_list: for k in part_list[0].keys(): out_dict[k].append(res[k]) for k in part_list[0].keys(): for res in zip(*(out_dict[k])): ordered_results[k].extend(list(res)) # the dataloader may pad some samples ordered_results[k] = ordered_results[k][:size] # remove tmp dir shutil.rmtree(tmpdir) return ordered_results
def build_dataloader(dataset, imgs_per_gpu, workers_per_gpu, num_gpus=1, dist=True, **kwargs): shuffle = kwargs.get('shuffle', True) if dist: rank, world_size = get_dist_info() if shuffle: sampler = DistributedGroupSampler(dataset, imgs_per_gpu, world_size, rank) else: sampler = DistributedSampler(dataset, world_size, rank, shuffle=False) batch_size = imgs_per_gpu num_workers = workers_per_gpu else: sampler = GroupSampler(dataset, imgs_per_gpu) if shuffle else None batch_size = num_gpus * imgs_per_gpu num_workers = num_gpus * workers_per_gpu data_loader = DataLoader(dataset, batch_size=batch_size, sampler=sampler, num_workers=num_workers, collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu), pin_memory=True, **kwargs) # modify container # data_loader = DataLoader( # dataset, # batch_size=batch_size, # sampler=sampler, # num_workers=num_workers, # collate_fn=trim_collate, # pin_memory=False, # **kwargs) return data_loader
def multi_gpu_test(model, data_loader, tmpdir=None): model.eval() results = [] dataset = data_loader.dataset rank, world = get_dist_info() if rank == 0: prog_bar = commons.ProgressBar(len(dataset)) for i, data in enumerate(data_loader): with torch.no_grad(): result = model(mode='test', **data) results.append(result) if rank == 0: bs = len(data['img']) for _ in range(bs * world): prog_bar.update() results = collect_results(results, len(dataset), tmpdir) return results
def collect_results_cpu(result_part, size, tmpdir=None): rank, world_size = get_dist_info() # create a tmp dir if it is not specified if tmpdir is None: MAX_LEN = 512 # 32 is whitespace dir_tensor = torch.full((MAX_LEN, ), 32, dtype=torch.uint8, device='cuda') if rank == 0: tmpdir = tempfile.mkdtemp() tmpdir = torch.tensor(bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda') dir_tensor[:len(tmpdir)] = tmpdir dist.broadcast(dir_tensor, 0) tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip() else: commons.mkdir_or_exist(tmpdir) # dump the part result to the dir commons.dump(result_part, os.path.join(tmpdir, f'part_{rank}.pkl')) dist.barrier() # collect all parts if rank != 0: return None else: # load results of all parts from tmp dir part_list = [] for i in range(world_size): part_file = os.join(tmpdir, f'part_{i}.pkl') part_list.append(commons.load(part_file)) # sort the results ordered_results = [] for res in zip(*part_list): ordered_results.extend(list(res)) # the dataloader may pad some samples ordered_results = ordered_results[:size] # remove tmp dir shutil.rmtree(tmpdir) return ordered_results
def main(): args = parse_args() cfg = commons.Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # cfg.model.pretrained = None cfg.data.test.test_mode = True # init distributed env first, since depends on the dist info if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # build the dataloader # TODO: support multiple images per gpu (only minor changes are needed) dataset = build_dataset(cfg.data.test) data_loader = build_dataloader(dataset, imgs_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) # build the model and load checkpoint model = build_model(cfg.model) check_item = args.checkpoint[0] checkpoint = load_checkpoint(model, os.path.join( cfg.work_dir, 'epoch_' + str(check_item) + '.pth'), map_location='cpu') label2ans = dataset.label2ans gpu_id = dist.get_rank() % torch.cuda.device_count() torch.cuda.set_device(gpu_id) model = model.cuda() if cfg.fp_16.enable: model = amp.initialize(model, opt_level=cfg.fp_16.opt_level, loss_scale=cfg.fp_16.loss_scale, max_loss_scale=cfg.fp_16.max_loss_scale) print('**** Initializing mixed precision done. ****') model = MMDistributedDataParallel( model, device_ids=[torch.cuda.current_device()], broadcast_buffers=False, ) outputs = multi_gpu_test(model, data_loader, args.tmpdir) rank, _ = get_dist_info() if rank == 0: output_path = os.path.join(cfg.work_dir, "test_results") commons.mkdir_or_exist(output_path) out_list = [] pickle.dump(outputs, open("outputs.pkl", 'wb')) ids = outputs["ids"] preds = outputs["pred"] for id, pred in zip(ids, preds): q_id = dataset.q_id_list[int(id)] pred_index = np.argmax(pred, axis=0) answer = dataset.label2ans[pred_index] out_list.append({'question_id': q_id, 'answer': answer}) print('\nwriting results to {}'.format(output_path)) commons.dump( out_list, os.path.join(output_path, "test_submit_{0}.json".format(str(check_item)))) os.system("rm -rf outputs.pkl")