Ejemplo n.º 1
0
    def __init__(self,
                 dataset,
                 samples_per_gpu=1,
                 num_replicas=None,
                 rank=None):
        _rank, _num_replicas = get_dist_info()
        if num_replicas is None:
            num_replicas = _num_replicas
        if rank is None:
            rank = _rank
        self.dataset = dataset
        self.samples_per_gpu = samples_per_gpu
        self.num_replicas = num_replicas
        self.rank = rank
        self.epoch = 0

        assert hasattr(self.dataset, 'flag')
        self.flag = self.dataset.flag
        self.group_sizes = np.bincount(self.flag)

        self.num_samples = 0
        for i, j in enumerate(self.group_sizes):
            self.num_samples += int(
                math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu /
                          self.num_replicas)) * self.samples_per_gpu
        self.total_size = self.num_samples * self.num_replicas
Ejemplo n.º 2
0
def get_root_logger(log_file=None,log_level=logging.INFO):
    """Get the root logger.
    The logger will be initialized if it has not been initialized. By default a
    StreamHandler will be added. If `log_file` is specified, a FileHandler will
    also be added. The name of the root logger is the top-level package name,
    e.g., "SOHO".
    
    :param log_file: 
    :param log_level: 
    :return: 
    """
    logger = logging.getLogger(__name__.split('.')[0])

    if logger.hasHandlers():
        return logger

    format_str = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    logging.basicConfig(format=format_str, level=log_level)
    rank, _ = get_dist_info()
    if rank != 0:
        logger.setLevel('ERROR')
    elif log_file is not None:
        file_handler = logging.FileHandler(log_file, 'w')
        file_handler.setFormatter(logging.Formatter(format_str))
        file_handler.setLevel(log_level)
        logger.addHandler(file_handler)

    return logger
Ejemplo n.º 3
0
def collect_results_gpu(result_part, size):
    rank, world_size = get_dist_info()
    # dump result part to tensor with pickle
    part_tensor = torch.tensor(bytearray(pickle.dumps(result_part)),
                               dtype=torch.uint8,
                               device='cuda')
    # gather all result part tensor shape
    shape_tensor = torch.tensor(part_tensor.shape, device='cuda')
    shape_list = [shape_tensor.clone() for _ in range(world_size)]
    dist.all_gather(shape_list, shape_tensor)
    # padding result part tensor to max length
    shape_max = torch.tensor(shape_list).max()
    part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda')
    part_send[:shape_tensor[0]] = part_tensor
    part_recv_list = [
        part_tensor.new_zeros(shape_max) for _ in range(world_size)
    ]
    # gather all result part
    dist.all_gather(part_recv_list, part_send)

    if rank == 0:
        part_list = []
        for recv, shape in zip(part_recv_list, shape_list):
            part_list.append(
                pickle.loads(recv[:shape[0]].cpu().numpy().tobytes()))
        # sort the results
        ordered_results = []
        for res in zip(*part_list):
            ordered_results.extend(list(res))
        # the dataloader may pad some samples
        ordered_results = ordered_results[:size]
        return ordered_results
Ejemplo n.º 4
0
def collect_results(result_part, size, tmpdir=None):
    results_out = {}
    for k in result_part[0].keys():
        results_out[k] = np.concatenate(
            [batch[k].numpy() for batch in result_part], axis=0)
    rank, world_size = get_dist_info()
    # create a tmp dir if it is not specified
    if tmpdir is None:
        MAX_LEN = 512
        # 32 is whitespace
        dir_tensor = torch.full((MAX_LEN, ),
                                32,
                                dtype=torch.uint8,
                                device='cuda')
        if rank == 0:
            tmpdir = tempfile.mkdtemp()
            tmpdir = torch.tensor(bytearray(tmpdir.encode()),
                                  dtype=torch.uint8,
                                  device='cuda')
            dir_tensor[:len(tmpdir)] = tmpdir
        dist.broadcast(dir_tensor, 0)
        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
    else:
        commons.mkdir_or_exist(tmpdir)
    # dump the part result to the dir
    commons.dump(results_out, os.path.join(tmpdir, 'part_{}.pkl'.format(rank)))
    dist.barrier()
    # collect all parts
    if rank != 0:
        return None
    else:
        # load results of all parts from tmp dir
        part_list = []
        for i in range(world_size):
            part_file = os.path.join(tmpdir, 'part_{}.pkl'.format(i))
            part_list.append(commons.load(part_file))
        # sort the results
        ordered_results = defaultdict(list)
        out_dict = defaultdict(list)
        for res in part_list:
            for k in part_list[0].keys():
                out_dict[k].append(res[k])

        for k in part_list[0].keys():
            for res in zip(*(out_dict[k])):
                ordered_results[k].extend(list(res))
        # the dataloader may pad some samples
            ordered_results[k] = ordered_results[k][:size]
        # remove tmp dir
        shutil.rmtree(tmpdir)
        return ordered_results
Ejemplo n.º 5
0
def build_dataloader(dataset,
                     imgs_per_gpu,
                     workers_per_gpu,
                     num_gpus=1,
                     dist=True,
                     **kwargs):
    shuffle = kwargs.get('shuffle', True)
    if dist:
        rank, world_size = get_dist_info()
        if shuffle:
            sampler = DistributedGroupSampler(dataset, imgs_per_gpu,
                                              world_size, rank)
        else:
            sampler = DistributedSampler(dataset,
                                         world_size,
                                         rank,
                                         shuffle=False)
        batch_size = imgs_per_gpu
        num_workers = workers_per_gpu
    else:
        sampler = GroupSampler(dataset, imgs_per_gpu) if shuffle else None
        batch_size = num_gpus * imgs_per_gpu
        num_workers = num_gpus * workers_per_gpu

    data_loader = DataLoader(dataset,
                             batch_size=batch_size,
                             sampler=sampler,
                             num_workers=num_workers,
                             collate_fn=partial(collate,
                                                samples_per_gpu=imgs_per_gpu),
                             pin_memory=True,
                             **kwargs)
    # modify container
    # data_loader = DataLoader(
    #     dataset,
    #     batch_size=batch_size,
    #     sampler=sampler,
    #     num_workers=num_workers,
    #     collate_fn=trim_collate,
    #     pin_memory=False,
    #     **kwargs)

    return data_loader
Ejemplo n.º 6
0
def multi_gpu_test(model, data_loader, tmpdir=None):
    model.eval()
    results = []
    dataset = data_loader.dataset
    rank, world = get_dist_info()
    if rank == 0:
        prog_bar = commons.ProgressBar(len(dataset))

    for i, data in enumerate(data_loader):
        with torch.no_grad():
            result = model(mode='test', **data)
        results.append(result)

        if rank == 0:
            bs = len(data['img'])
            for _ in range(bs * world):
                prog_bar.update()
    results = collect_results(results, len(dataset), tmpdir)

    return results
Ejemplo n.º 7
0
def collect_results_cpu(result_part, size, tmpdir=None):
    rank, world_size = get_dist_info()
    # create a tmp dir if it is not specified
    if tmpdir is None:
        MAX_LEN = 512
        # 32 is whitespace
        dir_tensor = torch.full((MAX_LEN, ),
                                32,
                                dtype=torch.uint8,
                                device='cuda')
        if rank == 0:
            tmpdir = tempfile.mkdtemp()
            tmpdir = torch.tensor(bytearray(tmpdir.encode()),
                                  dtype=torch.uint8,
                                  device='cuda')
            dir_tensor[:len(tmpdir)] = tmpdir
        dist.broadcast(dir_tensor, 0)
        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
    else:
        commons.mkdir_or_exist(tmpdir)
    # dump the part result to the dir
    commons.dump(result_part, os.path.join(tmpdir, f'part_{rank}.pkl'))
    dist.barrier()
    # collect all parts
    if rank != 0:
        return None
    else:
        # load results of all parts from tmp dir
        part_list = []
        for i in range(world_size):
            part_file = os.join(tmpdir, f'part_{i}.pkl')
            part_list.append(commons.load(part_file))
        # sort the results
        ordered_results = []
        for res in zip(*part_list):
            ordered_results.extend(list(res))
        # the dataloader may pad some samples
        ordered_results = ordered_results[:size]
        # remove tmp dir
        shutil.rmtree(tmpdir)
        return ordered_results
Ejemplo n.º 8
0
def main():
    args = parse_args()
    cfg = commons.Config.fromfile(args.config)
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True

    # cfg.model.pretrained = None
    cfg.data.test.test_mode = True

    # init distributed env first, since depends on the dist info
    if args.launcher == 'none':
        distributed = False
    else:
        distributed = True
        init_dist(args.launcher, **cfg.dist_params)

    # build the dataloader
    # TODO: support multiple images per gpu (only minor changes are needed)
    dataset = build_dataset(cfg.data.test)
    data_loader = build_dataloader(dataset,
                                   imgs_per_gpu=1,
                                   workers_per_gpu=cfg.data.workers_per_gpu,
                                   dist=distributed,
                                   shuffle=False)
    # build the model and load checkpoint
    model = build_model(cfg.model)
    check_item = args.checkpoint[0]
    checkpoint = load_checkpoint(model,
                                 os.path.join(
                                     cfg.work_dir,
                                     'epoch_' + str(check_item) + '.pth'),
                                 map_location='cpu')
    label2ans = dataset.label2ans

    gpu_id = dist.get_rank() % torch.cuda.device_count()
    torch.cuda.set_device(gpu_id)
    model = model.cuda()
    if cfg.fp_16.enable:
        model = amp.initialize(model,
                               opt_level=cfg.fp_16.opt_level,
                               loss_scale=cfg.fp_16.loss_scale,
                               max_loss_scale=cfg.fp_16.max_loss_scale)
        print('**** Initializing mixed precision done. ****')
    model = MMDistributedDataParallel(
        model,
        device_ids=[torch.cuda.current_device()],
        broadcast_buffers=False,
    )
    outputs = multi_gpu_test(model, data_loader, args.tmpdir)

    rank, _ = get_dist_info()
    if rank == 0:
        output_path = os.path.join(cfg.work_dir, "test_results")
        commons.mkdir_or_exist(output_path)
        out_list = []
        pickle.dump(outputs, open("outputs.pkl", 'wb'))

        ids = outputs["ids"]
        preds = outputs["pred"]

        for id, pred in zip(ids, preds):
            q_id = dataset.q_id_list[int(id)]
            pred_index = np.argmax(pred, axis=0)
            answer = dataset.label2ans[pred_index]
            out_list.append({'question_id': q_id, 'answer': answer})

        print('\nwriting results to {}'.format(output_path))
        commons.dump(
            out_list,
            os.path.join(output_path,
                         "test_submit_{0}.json".format(str(check_item))))
        os.system("rm -rf outputs.pkl")