def reduce_loss_dict(loss_dict): """ Reduce the loss dictionary from all processes so that process with rank 0 has the averaged results. Returns a dict with the same fields as loss_dict, after reduction. """ world_size = get_world_size() if world_size < 2: return loss_dict with torch.no_grad(): loss_names = [] all_losses = [] for k in sorted(loss_dict.keys()): loss_names.append(k) all_losses.append(loss_dict[k]) all_losses = torch.stack(all_losses, dim=0) torch.distributed.reduce(all_losses, dst=0) if torch.distributed.get_rank() == 0: # only main process gets accumulated, so only divide by # world_size in this case all_losses /= world_size reduced_losses = {k: v for k, v in zip(loss_names, all_losses)} return reduced_losses
def __init__(self, cfg): autoaug_list = cfg.AUTOAUG.LIST num_policies = cfg.AUTOAUG.NUM_SUBPOLICIES max_iters = cfg.SOLVER.MAX_ITER scale_splits = cfg.AUTOAUG.SCALE_SPLITS box_prob = cfg.AUTOAUG.BOX_PROB img_aug_list = autoaug_list[:4] img_augs_dict = { 'zoom_out': { 'prob': img_aug_list[0] * 0.05, 'level': img_aug_list[1] }, 'zoom_in': { 'prob': img_aug_list[2] * 0.05, 'level': img_aug_list[3] } } self.img_augs = Img_augs(img_augs_dict=img_augs_dict) box_aug_list = autoaug_list[4:] color_aug_types = list(color_aug_func.keys()) geometric_aug_types = list(geometric_aug_func.keys()) policies = [] for i in range(num_policies): _start_pos = i * 6 sub_policy = [ ( color_aug_types[box_aug_list[_start_pos + 0] % len(color_aug_types)], box_aug_list[_start_pos + 1] * 0.1, box_aug_list[_start_pos + 2], ), # box_color policy (geometric_aug_types[box_aug_list[_start_pos + 3] % len(geometric_aug_types)], box_aug_list[_start_pos + 4] * 0.1, box_aug_list[_start_pos + 5]) ] # box_geometric policy policies.append(sub_policy) _start_pos = num_policies * 6 scale_ratios = { 'area': [ box_aug_list[_start_pos + 0], box_aug_list[_start_pos + 1], box_aug_list[_start_pos + 2] ], 'prob': [ box_aug_list[_start_pos + 3], box_aug_list[_start_pos + 4], box_aug_list[_start_pos + 5] ] } box_augs_dict = {'policies': policies, 'scale_ratios': scale_ratios} self.box_augs = Box_augs(box_augs_dict=box_augs_dict, max_iters=max_iters, scale_splits=scale_splits, box_prob=box_prob) self.max_iters = max_iters self.count = 0 num_gpus = get_world_size() self.batch_size = cfg.SOLVER.IMS_PER_BATCH // num_gpus self.num_workers = cfg.DATALOADER.NUM_WORKERS if self.num_workers == 0: self.num_workers += 1
def make_data_loader(cfg, is_train=True, is_distributed=False, start_iter=0): num_gpus = get_world_size() if is_train: images_per_batch = cfg.SOLVER.IMS_PER_BATCH assert (images_per_batch % num_gpus == 0 ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number " "of GPUs ({}) used.".format(images_per_batch, num_gpus) images_per_gpu = images_per_batch // num_gpus shuffle = True num_iters = cfg.SOLVER.MAX_ITER else: images_per_batch = cfg.TEST.IMS_PER_BATCH assert (images_per_batch % num_gpus == 0 ), "TEST.IMS_PER_BATCH ({}) must be divisible by the number " "of GPUs ({}) used.".format(images_per_batch, num_gpus) images_per_gpu = images_per_batch // num_gpus shuffle = False if not is_distributed else True num_iters = None start_iter = 0 if images_per_gpu > 1: logger = logging.getLogger(__name__) logger.warning( "When using more than one image per GPU you may encounter " "an out-of-memory (OOM) error if your GPU does not have " "sufficient memory. If this happens, you can reduce " "SOLVER.IMS_PER_BATCH (for training) or " "TEST.IMS_PER_BATCH (for inference). For training, you must " "also adjust the learning rate and schedule length according " "to the linear scaling rule. See for example: " "https://github.com/facebookresearch/Detectron/blob/master/configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml#L14" ) # group images which have similar aspect ratio. In this case, we only # group in two cases: those with width / height > 1, and the other way around, # but the code supports more general grouping strategy aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else [] paths_catalog = import_file("fcos_core.config.paths_catalog", cfg.PATHS_CATALOG, True) DatasetCatalog = paths_catalog.DatasetCatalog dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST # If bbox aug is enabled in testing, simply set transforms to None and we will apply transforms later transforms = None if not is_train and cfg.TEST.BBOX_AUG.ENABLED else build_transforms( cfg, is_train) datasets = build_dataset(dataset_list, transforms, DatasetCatalog, is_train) data_loaders = [] for dataset in datasets: sampler = make_data_sampler(dataset, shuffle, is_distributed) batch_sampler = make_batch_data_sampler(dataset, sampler, aspect_grouping, images_per_gpu, num_iters, start_iter) collator = BBoxAugCollator() if not is_train and cfg.TEST.BBOX_AUG.ENABLED else \ BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY) num_workers = cfg.DATALOADER.NUM_WORKERS data_loader = torch.utils.data.DataLoader( dataset, num_workers=num_workers, batch_sampler=batch_sampler, collate_fn=collator, ) data_loaders.append(data_loader) if is_train: # during training, a single (possibly concatenated) data_loader is returned assert len(data_loaders) == 1 return data_loaders[0] return data_loaders
def make_data_loader(cfg, is_train=True, is_distributed=False, start_iter=0): num_gpus = get_world_size() #判断gpu数量 if is_train: images_per_batch = cfg.SOLVER.IMS_PER_BATCH #16 每个batch_size的图片是16张 assert (images_per_batch % num_gpus == 0 #判断每个batch_size的图片可以均匀的分到多个gpu上面 ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number " "of GPUs ({}) used.".format(images_per_batch, num_gpus) images_per_gpu = images_per_batch // num_gpus shuffle = True num_iters = cfg.SOLVER.MAX_ITER #40000 最大迭代次数不超过40000 else: images_per_batch = cfg.TEST.IMS_PER_BATCH assert (images_per_batch % num_gpus == 0 ), "TEST.IMS_PER_BATCH ({}) must be divisible by the number " "of GPUs ({}) used.".format(images_per_batch, num_gpus) images_per_gpu = images_per_batch // num_gpus shuffle = False if not is_distributed else True num_iters = None start_iter = 0 if images_per_gpu > 1: #提示关于训练过程中的内存不足的问题 logger = logging.getLogger(__name__) logger.warning( "When using more than one image per GPU you may encounter " "an out-of-memory (OOM) error if your GPU does not have " "sufficient memory. If this happens, you can reduce " "SOLVER.IMS_PER_BATCH (for training) or " "TEST.IMS_PER_BATCH (for inference). For training, you must " "also adjust the learning rate and schedule length according " "to the linear scaling rule. See for example: " "https://github.com/facebookresearch/Detectron/blob/master/configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml#L14" ) #将图片进行分组,仅仅根据两种情形分组,一种是图片的宽/高>1的,一种是其他的. # group images which have similar aspect ratio. In this case, we only # group in two cases: those with width / height > 1, and the other way around, # but the code supports more general grouping strategy aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else [ ] #True #PATHS_CATALOG=os.path.join(os.path.dirname(__file__), "paths_catalog.py") #找出对应的加载数据集脚本的路径 paths_catalog = import_file("fcos_core.config.paths_catalog", cfg.PATHS_CATALOG, True) #DatasetCatalog 对应的是fcos_core.config.paths_catalog中的DatasetCatalog类,并对其进行实例化. DatasetCatalog = paths_catalog.DatasetCatalog #对应要训练的数据集路径 <class 'fcos_core.config.paths_catalog.DatasetCatalog'> dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST #数据集列表 训练或是测试 对应的列表中的数据集不一样 # train: ("coco_2014_train", "coco_2014_valminusminival") # test: ("coco_2014_minival",) print(dataset_list) transforms = build_transforms(cfg, is_train) #对输入图片进行变换,随机水平分割归一化等操作 datasets = build_dataset(dataset_list, transforms, DatasetCatalog, is_train) data_loaders = [] for dataset in datasets: sampler = make_data_sampler(dataset, shuffle, is_distributed) batch_sampler = make_batch_data_sampler(dataset, sampler, aspect_grouping, images_per_gpu, num_iters, start_iter) collator = BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY) num_workers = cfg.DATALOADER.NUM_WORKERS data_loader = torch.utils.data.DataLoader( dataset, num_workers=num_workers, batch_sampler=batch_sampler, collate_fn=collator, ) data_loaders.append(data_loader) if is_train: # during training, a single (possibly concatenated) data_loader is returned assert len(data_loaders) == 1 return data_loaders[0] return data_loaders