Ejemplo n.º 1
0
    def __init__(self, cfg):
        """
        Args:
            cfg (CfgNode):
        """
        super().__init__()
        logger = logging.getLogger("detectron2")
        if not logger.isEnabledFor(
                logging.INFO):  # setup_logger is not called for d2
            setup_logger()
        cfg = DefaultTrainer.auto_scale_workers(cfg, comm.get_world_size())

        # Assume these objects must be constructed in this order.
        model = self.build_model(cfg)
        optimizer = self.build_optimizer(cfg, model)
        data_loader = self.build_train_loader(cfg)

        # For training, wrap with DDP. But don't need this for inference.
        if comm.get_world_size() > 1:
            model = DistributedDataParallel(model,
                                            device_ids=[comm.get_local_rank()],
                                            broadcast_buffers=False)
        self._trainer = (AMPTrainer if cfg.SOLVER.AMP.ENABLED else
                         SimpleTrainer)(model, data_loader, optimizer)

        self.scheduler = self.build_lr_scheduler(cfg, optimizer)
        # Assume no other objects need to be checkpointed.
        # We can later make it checkpoint the stateful hooks
        self.checkpointer = DetectionCheckpointer(
            # Assume you want to save checkpoints together with logs/statistics
            model,
            cfg.OUTPUT_DIR,
            optimizer=optimizer,
            scheduler=self.scheduler,
        )
        self.start_iter = 0
        self.max_iter = cfg.SOLVER.MAX_ITER
        self.cfg = cfg

        self.register_hooks(self.build_hooks())
Ejemplo n.º 2
0
def build_batch_data_loader(  # type: ignore[no-untyped-def]
    dataset,
    sampler,
    total_batch_size: int,
    *,
    aspect_ratio_grouping: bool = False,
    num_workers: int = 0,
    drop_last: bool = True,
) -> Union[torch.utils.data.DataLoader, AspectRatioGroupedDataset]:
    """
    Build a batched dataloader for training.
    Modified from detectron2 to expose the `drop_last` option.

    Args:
        dataset (torch.utils.data.Dataset): map-style PyTorch dataset. Can be indexed.
        sampler (torch.utils.data.sampler.Sampler): a sampler that produces indices
        total_batch_size, aspect_ratio_grouping, num_workers): see
            :func:`build_detection_train_loader`.

    Returns:
        iterable[list]. Length of each list is the batch size of the current
            GPU. Each element in the list comes from the dataset.
    """
    world_size = get_world_size()
    assert (
        total_batch_size > 0 and total_batch_size % world_size == 0
    ), "Total batch size ({}) must be divisible by the number of gpus ({}).".format(
        total_batch_size, world_size)

    batch_size = total_batch_size // world_size
    if aspect_ratio_grouping:
        data_loader = torch.utils.data.DataLoader(
            dataset,
            sampler=sampler,
            num_workers=num_workers,
            batch_sampler=None,
            collate_fn=operator.itemgetter(
                0),  # don't batch, but yield individual elements
            worker_init_fn=worker_init_reset_seed,
        )  # yield individual mapped dict
        return AspectRatioGroupedDataset(data_loader, batch_size)
    else:
        batch_sampler = torch.utils.data.sampler.BatchSampler(
            sampler, batch_size,
            drop_last=drop_last)  # srnet: expose drop_last to caller
        return torch.utils.data.DataLoader(
            dataset,
            num_workers=num_workers,
            batch_sampler=batch_sampler,
            collate_fn=trivial_batch_collator,
            worker_init_fn=worker_init_reset_seed,
        )
Ejemplo n.º 3
0
def build_batch_data_loader(dataset,
                            sampler,
                            total_batch_size,
                            *,
                            aspect_ratio_grouping=False,
                            num_workers=0):
    """
    Build a batched dataloader. The main differences from `torch.utils.data.DataLoader` are:
    1. support aspect ratio grouping options
    2. use no "batch collation", because this is common for detection training

    Args:
        dataset (torch.utils.data.Dataset): a pytorch map-style or iterable dataset.
        sampler (torch.utils.data.sampler.Sampler or None): a sampler that produces indices.
            Must be provided iff. ``dataset`` is a map-style dataset.
        total_batch_size, aspect_ratio_grouping, num_workers): see
            :func:`build_detection_train_loader`.

    Returns:
        iterable[list]. Length of each list is the batch size of the current
            GPU. Each element in the list comes from the dataset.
    """
    world_size = get_world_size()
    assert (
        total_batch_size > 0 and total_batch_size % world_size == 0
    ), "Total batch size ({}) must be divisible by the number of gpus ({}).".format(
        total_batch_size, world_size)
    batch_size = total_batch_size // world_size

    if isinstance(dataset, torchdata.IterableDataset):
        assert sampler is None, "sampler must be None if dataset is IterableDataset"
    else:
        dataset = ToIterableDataset(dataset, sampler)

    if aspect_ratio_grouping:
        data_loader = torchdata.DataLoader(
            dataset,
            num_workers=num_workers,
            collate_fn=operator.itemgetter(
                0),  # don't batch, but yield individual elements
            worker_init_fn=worker_init_reset_seed,
        )  # yield individual mapped dict
        return AspectRatioGroupedDataset(data_loader, batch_size)
    else:
        return torchdata.DataLoader(
            dataset,
            batch_size=batch_size,
            drop_last=True,
            num_workers=num_workers,
            collate_fn=trivial_batch_collator,
            worker_init_fn=worker_init_reset_seed,
        )
Ejemplo n.º 4
0
    def fcos_losses(self, instances):
        num_classes = instances.logits_pred.size(1)
        assert num_classes == self.num_classes

        labels = instances.labels.flatten()

        pos_inds = torch.nonzero(labels != num_classes).squeeze(1)
        num_pos_local = pos_inds.numel()
        num_gpus = get_world_size()
        total_num_pos = reduce_sum(pos_inds.new_tensor([num_pos_local])).item()
        num_pos_avg = max(total_num_pos / num_gpus, 1.0)

        # prepare one_hot
        class_target = torch.zeros_like(instances.logits_pred)
        class_target[pos_inds, labels[pos_inds]] = 1

        class_loss = sigmoid_focal_loss_jit(
            instances.logits_pred,
            class_target,
            alpha=self.focal_loss_alpha,
            gamma=self.focal_loss_gamma,
            reduction="sum",
        ) / num_pos_avg

        instances = instances[pos_inds]
        instances.pos_inds = pos_inds

        ctrness_targets = compute_ctrness_targets(instances.reg_targets)
        ctrness_targets_sum = ctrness_targets.sum()
        loss_denorm = max(
            reduce_sum(ctrness_targets_sum).item() / num_gpus, 1e-6)
        instances.gt_ctrs = ctrness_targets

        if pos_inds.numel() > 0:
            reg_loss = self.loc_loss_func(instances.reg_pred,
                                          instances.reg_targets,
                                          ctrness_targets) / loss_denorm

            ctrness_loss = F.binary_cross_entropy_with_logits(
                instances.ctrness_pred, ctrness_targets,
                reduction="sum") / num_pos_avg
        else:
            reg_loss = instances.reg_pred.sum() * 0
            ctrness_loss = instances.ctrness_pred.sum() * 0

        losses = {
            "loss_fcos_cls": class_loss,
            "loss_fcos_loc": reg_loss,
            "loss_fcos_ctr": ctrness_loss
        }
        extras = {"instances": instances, "loss_denorm": loss_denorm}
        return extras, losses
Ejemplo n.º 5
0
def default_setup(cfg, args=None):
    """
    Perform some basic common setups at the beginning of a job, including:

    1. Set up the detectron2 logger
    2. Log basic information about environment, cmdline arguments, and config
    3. Backup the config to the output directory

    Args:
        cfg (CfgNode): the full config to be used
        args (argparse.NameSpace): the command line arguments to be logged
    """
    output_dir = cfg.OUTPUT_DIR
    if comm.is_main_process() and output_dir:
        PathManager.mkdirs(output_dir)

    rank = comm.get_rank()
    setup_logger(output_dir, distributed_rank=rank, name="fvcore")

    logger = setup_logger(output_dir, distributed_rank=rank)

    handlers = []
    handlers.append(logger.handlers[-2])
    handlers.append(logger.handlers[-1])
    logger.handlers = handlers

    logger.info("Rank of current process: {}. World size: {}".format(
        rank, comm.get_world_size()))
    logger.info("Environment info:\n" + collect_env_info())

    logger.info("Command line arguments: " + str(args))
    if hasattr(args, "config_file"):
        logger.info("Contents of args.config_file={}:\n{}".format(
            args.config_file,
            PathManager.open(args.config_file, "r").read()))

    logger.info("Running with full config:\n{}".format(cfg))
    if comm.is_main_process() and output_dir:
        # Note: some of our scripts may expect the existence of
        # config.yaml in output directory
        path = os.path.join(output_dir, "config.yaml")
        with PathManager.open(path, "w") as f:
            f.write(cfg.dump())
        logger.info("Full config saved to {}".format(os.path.abspath(path)))

    # make sure each worker has a different, yet deterministic seed if specified
    seed_all_rng(None if cfg.SEED < 0 else cfg.SEED + rank)

    # cudnn benchmark has large overhead. It shouldn't be used considering the small size of
    # typical validation set.
    if not (hasattr(args, "eval_only") and args.eval_only):
        torch.backends.cudnn.benchmark = cfg.CUDNN_BENCHMARK
Ejemplo n.º 6
0
    def wrap_model_with_ddp(self, cfg, model):
        """
        Returns:
            torch.nn.Module:

        Overwrite this function if you'd like to implement more with `torch.nn.parallel.DistributedDataParallel`,
        such as adding `find_unused_parameters=True`.
        """
        if comm.get_world_size() > 1:
            model = DistributedDataParallel(model,
                                            device_ids=[comm.get_local_rank()],
                                            broadcast_buffers=False)
        return model
    def _get_config(self):
        cfg = get_cfg()
        add_deeplab_config(cfg)
        defaultConfig = optionsHelper.get_hierarchical_value(self.options, ['options', 'model', 'config', 'value', 'id'])
        configFile = os.path.join(os.getcwd(), 'ai/models/detectron2/_functional/configs', defaultConfig)
        cfg.merge_from_file(configFile)

        # disable SyncBatchNorm if not running on distributed system
        if comm.get_world_size() <= 1:
            cfg.MODEL.RESNETS.NORM = 'BN'
            cfg.MODEL.SEM_SEG_HEAD.NORM = 'BN'

        return cfg
Ejemplo n.º 8
0
def _compute_num_images_per_worker(cfg: CfgNode):
    num_workers = get_world_size()
    images_per_batch = cfg.SOLVER.IMS_PER_BATCH
    assert (
        images_per_batch % num_workers == 0
    ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format(
        images_per_batch, num_workers)
    assert (
        images_per_batch >= num_workers
    ), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format(
        images_per_batch, num_workers)
    images_per_worker = images_per_batch // num_workers
    return images_per_worker
    def __init__(self, size: int):
        """
        Args:
            size (int): the total number of data of the underlying dataset to sample from
        """
        self._size = size
        assert size > 0
        self._rank = comm.get_rank()
        self._world_size = comm.get_world_size()

        shard_size = (self._size - 1) // self._world_size + 1
        self.begin = shard_size * self._rank
        self.end = min(shard_size * (self._rank + 1), self._size)
Ejemplo n.º 10
0
def load_kitti2cityscapes_instances(image_dir,
                                    gt_dir,
                                    from_json=True,
                                    to_polygons=True,
                                    istest=False):
    """
    Args:
        image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train".
        gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train".
        from_json (bool): whether to read annotations from the raw json file or the png files.
        to_polygons (bool): whether to represent the segmentation as polygons
            (COCO's format) instead of masks (cityscapes's format).

    Returns:
        list[dict]: a list of dicts in Detectron2 standard format. (See
        `Using Custom Datasets </tutorials/datasets.html>`_ )
    """
    if from_json:
        assert to_polygons, (
            "Cityscapes's json annotations are in polygon format. "
            "Converting to mask format is not supported now.")
    files = _get_kitti2cityscapes_files(image_dir, gt_dir, istest)

    logger.info("Preprocessing kitti2citycsapes annotations ...")
    # This is still not fast: all workers will execute duplicate works and will
    # take up to 10m on a 8GPU server.
    pool = mp.Pool(processes=max(mp.cpu_count() // get_world_size() // 2, 4))

    ret = pool.map(
        functools.partial(_kitti2cityscapes_files_to_dict,
                          from_json=from_json,
                          to_polygons=to_polygons,
                          istest=istest),
        files,
    )
    logger.info("Loaded {} images from {}".format(len(ret), image_dir))

    if not istest:
        # Map cityscape ids to contiguous ids
        from .kitti2cityscapes_label import labels

        labels = [l for l in labels if l.hasInstances and not l.ignoreInEval]
        dataset_id_to_contiguous_id = {
            l.id: idx
            for idx, l in enumerate(labels)
        }
        for dict_per_image in ret:
            for anno in dict_per_image["annotations"]:
                anno["category_id"] = dataset_id_to_contiguous_id[
                    anno["category_id"]]
    return ret
Ejemplo n.º 11
0
def build_semisup_batch_data_loader_two_crop(dataset,
                                             sampler,
                                             total_batch_size_label,
                                             total_batch_size_unlabel,
                                             *,
                                             aspect_ratio_grouping=False,
                                             num_workers=0):
    world_size = get_world_size()
    assert (
        total_batch_size_label > 0 and total_batch_size_label % world_size == 0
    ), "Total label batch size ({}) must be divisible by the number of gpus ({}).".format(
        total_batch_size_label, world_size)

    assert (
        total_batch_size_unlabel > 0
        and total_batch_size_unlabel % world_size == 0
    ), "Total unlabel batch size ({}) must be divisible by the number of gpus ({}).".format(
        total_batch_size_label, world_size)

    batch_size_label = total_batch_size_label // world_size
    batch_size_unlabel = total_batch_size_unlabel // world_size

    label_dataset, unlabel_dataset = dataset
    label_sampler, unlabel_sampler = sampler

    if aspect_ratio_grouping:
        label_data_loader = torch.utils.data.DataLoader(
            label_dataset,
            sampler=label_sampler,
            num_workers=num_workers,
            batch_sampler=None,
            collate_fn=operator.itemgetter(
                0),  # don't batch, but yield individual elements
            worker_init_fn=worker_init_reset_seed,
        )  # yield individual mapped dict
        unlabel_data_loader = torch.utils.data.DataLoader(
            unlabel_dataset,
            sampler=unlabel_sampler,
            num_workers=num_workers,
            batch_sampler=None,
            collate_fn=operator.itemgetter(
                0),  # don't batch, but yield individual elements
            worker_init_fn=worker_init_reset_seed,
        )  # yield individual mapped dict
        return AspectRatioGroupedSemiSupDatasetTwoCrop(
            (label_data_loader, unlabel_data_loader),
            (batch_size_label, batch_size_unlabel),
        )
    else:
        raise NotImplementedError(
            "ASPECT_RATIO_GROUPING = False is not supported yet")
Ejemplo n.º 12
0
def build_batch_data_loader(dataset,
                            sampler,
                            total_batch_size,
                            *,
                            aspect_ratio_grouping=False,
                            num_workers=0):
    """
    Build a batched dataloader for training.

    Args:
        dataset (torch.utils.data.Dataset): map-style PyTorch dataset. Can be indexed.
        sampler (torch.utils.data.sampler.Sampler): a sampler that produces indices
        total_batch_size, aspect_ratio_grouping, num_workers): see
            :func:`build_detection_train_loader`.

    Returns:
        iterable[list]. Length of each list is the batch size of the current
            GPU. Each element in the list comes from the dataset.
    """
    world_size = get_world_size()
    assert (
        total_batch_size > 0 and total_batch_size % world_size == 0
    ), "Total batch size ({}) must be divisible by the number of gpus ({}).".format(
        total_batch_size, world_size)
    # 实际batch_size
    batch_size = total_batch_size // world_size
    if aspect_ratio_grouping:
        data_loader = torch.utils.data.DataLoader(
            dataset,
            sampler=sampler,
            num_workers=num_workers,
            batch_sampler=None,
            #operator.itemgetter(0) 获取对象的第0个元素的函数
            collate_fn=operator.itemgetter(
                0),  # don't batch, but yield individual elements
            worker_init_fn=worker_init_reset_seed,
        )  # yield individual mapped dict
        # 根据aspectRatio 来选成一组
        return AspectRatioGroupedDataset(data_loader, batch_size)
    else:
        # batch_sampler 返回的是一个batch
        batch_sampler = torch.utils.data.sampler.BatchSampler(
            sampler, batch_size,
            drop_last=True)  # drop_last so the batch always have the same size
        return torch.utils.data.DataLoader(
            dataset,
            num_workers=num_workers,
            batch_sampler=batch_sampler,
            collate_fn=trivial_batch_collator,  # do nothing, 因为常规的会合并,这个不合并
            worker_init_fn=worker_init_reset_seed,
        )
Ejemplo n.º 13
0
    def forward(self, outputs, targets):
        """ This performs the loss computation.
        Parameters:
             outputs: dict of tensors, see the output specification of the model for the format
             targets: list of dicts, such that len(targets) == batch_size.
                      The expected keys in each dict depends on the losses applied, see each loss' doc
        """

        outputs_without_aux = {
            k: v
            for k, v in outputs.items()
            if k != 'aux_outputs' and k != 'enc_outputs'
        }

        # Retrieve the matching between the outputs of the last layer and the targets
        indices = self.matcher(outputs_without_aux, targets)

        # Compute the average number of target boxes accross all nodes, for normalization purposes
        num_boxes = sum(len(t["labels"]) for t in targets)
        num_boxes = torch.as_tensor([num_boxes],
                                    dtype=torch.float,
                                    device=next(iter(outputs.values())).device)
        if is_dist_avail_and_initialized():
            torch.distributed.all_reduce(num_boxes)
        num_boxes = torch.clamp(num_boxes / comm.get_world_size(),
                                min=1).item()

        # Compute all the requested losses
        losses = {}
        for loss in self.losses:
            losses.update(
                self.get_loss(loss, outputs, targets, indices, num_boxes))

        # In case of auxiliary losses, we repeat this process with the output of each intermediate layer.
        if 'aux_outputs' in outputs:
            for i, aux_outputs in enumerate(outputs['aux_outputs']):
                indices = self.matcher(aux_outputs, targets)
                for loss in self.losses:
                    if loss == 'masks':
                        # Intermediate masks losses are too costly to compute, we ignore them.
                        continue
                    kwargs = {}
                    if loss == 'labels':
                        # Logging is enabled only for the last layer
                        kwargs = {'log': False}
                    l_dict = self.get_loss(loss, aux_outputs, targets, indices,
                                           num_boxes, **kwargs)
                    l_dict = {k + f'_{i}': v for k, v in l_dict.items()}
                    losses.update(l_dict)

        return losses
Ejemplo n.º 14
0
    def fcos_losses(self, labels, reg_targets, logits_pred, reg_pred,
                    ctrness_pred, gt_inds, mask_centers_targets):
        num_classes = logits_pred.size(1)
        assert num_classes == self.num_classes

        labels = labels.flatten()

        pos_inds = torch.nonzero(labels != num_classes).squeeze(1)
        num_pos_local = pos_inds.numel()
        num_gpus = get_world_size()
        total_num_pos = reduce_sum(pos_inds.new_tensor([num_pos_local])).item()
        num_pos_avg = max(total_num_pos / num_gpus, 1.0)

        # prepare one_hot
        class_target = torch.zeros_like(logits_pred)
        class_target[pos_inds, labels[pos_inds]] = 1

        class_loss = sigmoid_focal_loss_jit(
            logits_pred,
            class_target,
            alpha=self.focal_loss_alpha,
            gamma=self.focal_loss_gamma,
            reduction="sum",
        ) / num_pos_avg

        reg_pred = reg_pred[pos_inds]
        reg_targets = reg_targets[pos_inds]
        ctrness_pred = ctrness_pred[pos_inds]
        gt_inds = gt_inds[pos_inds]
        mask_center = mask_centers_targets[pos_inds]

        # 需要修改
        # ctrness_targets = compute_ctrness_targets(reg_targets)
        # ctrness_targets_sum = ctrness_targets.sum()
        # loss_denorm = max(reduce_sum(ctrness_targets_sum).item() / num_gpus, 1e-6)

        if pos_inds.numel() > 0:
            reg_loss = self.loc_loss_func(
                reg_pred,
                reg_targets,
                ctrness_pred,
                mask_center,
            )
        else:
            reg_loss = reg_pred.sum() * 0
        losses = {"loss_fcos_cls": class_loss, "loss_fcos_loc": reg_loss}
        extras = {
            "pos_inds": pos_inds,
            "gt_inds": gt_inds,
        }
        return losses, extras
Ejemplo n.º 15
0
def build_ss_batch_data_loader(dataset,
                               sampler,
                               total_batch_size_label,
                               total_batch_size_unlabel,
                               *,
                               aspect_ratio_grouping=True,
                               num_workers=0):
    """Instantiates two data loaders based on provided metadata and wraps them into a single loader.

    Code is largely taken from `detectron2.data.build.build_batch_data_loader`.
    """
    world_size = get_world_size()

    # Check that batch sizes are divisible by the #GPUs
    assert (
        total_batch_size_label > 0 and total_batch_size_label % world_size == 0
    ), "Total label batch size ({}) must be divisible by the number of gpus ({}).".format(
        total_batch_size_label, world_size)
    assert (
        total_batch_size_unlabel > 0
        and total_batch_size_unlabel % world_size == 0
    ), "Total unlabel batch size ({}) must be divisible by the number of gpus ({}).".format(
        total_batch_size_label, world_size)

    # Calculate per-GPU batch sizes
    batch_size_label = total_batch_size_label // world_size
    batch_size_unlabel = total_batch_size_unlabel // world_size

    label_dataset, unlabel_dataset = dataset
    label_sampler, unlabel_sampler = sampler

    assert aspect_ratio_grouping, "ASPECT_RATIO_GROUPING = False is not supported yet"

    # Wrapper for DataLoader instantiation to avoid duplicate code
    create_data_loader = lambda dataset, sampler: torch.utils.data.DataLoader(
        dataset,
        sampler=sampler,
        num_workers=num_workers,
        batch_sampler=None,
        collate_fn=operator.itemgetter(
            0),  # don't batch, but yield individual elements
        worker_init_fn=worker_init_reset_seed,
    )  # yield individual mapped dict

    label_data_loader = create_data_loader(label_dataset, label_sampler)
    unlabel_data_loader = create_data_loader(unlabel_dataset, unlabel_sampler)

    return AspectRatioGroupedSSDataset(
        (label_data_loader, unlabel_data_loader),
        (batch_size_label, batch_size_unlabel),
    )
Ejemplo n.º 16
0
def build_batch_data_loader(dataset,
                            sampler,
                            total_batch_size,
                            *,
                            aspect_ratio_grouping=False,
                            num_workers=0):
    """
    Build a batched dataloader for training.

    Args:
        dataset (torch.utils.data.Dataset): map-style PyTorch dataset. Can be indexed.
        sampler (torch.utils.data.sampler.Sampler): a sampler that produces indices
        total_batch_size (int): total batch size across GPUs.
        aspect_ratio_grouping (bool): whether to group images with similar
            aspect ratio for efficiency. When enabled, it requires each
            element in dataset be a dict with keys "width" and "height".
        num_workers (int): number of parallel data loading workers

    Returns:
        iterable[list]. Length of each list is the batch size of the current
            GPU. Each element in the list comes from the dataset.
    """
    world_size = get_world_size()
    assert (
        total_batch_size > 0 and total_batch_size % world_size == 0
    ), "Total batch size ({}) must be divisible by the number of gpus ({}).".format(
        total_batch_size, world_size)

    batch_size = total_batch_size // world_size
    if aspect_ratio_grouping:
        data_loader = torch.utils.data.DataLoader(
            dataset,
            sampler=sampler,
            num_workers=num_workers,
            batch_sampler=None,
            collate_fn=operator.itemgetter(
                0),  # don't batch, but yield individual elements
            worker_init_fn=worker_init_reset_seed,
        )  # yield individual mapped dict
        return AspectRatioGroupedDataset(data_loader, batch_size)
    else:
        batch_sampler = torch.utils.data.sampler.BatchSampler(
            sampler, batch_size,
            drop_last=True)  # drop_last so the batch always have the same size
        return torch.utils.data.DataLoader(
            dataset,
            num_workers=num_workers,
            batch_sampler=batch_sampler,
            collate_fn=trivial_batch_collator,
            worker_init_fn=worker_init_reset_seed,
        )
Ejemplo n.º 17
0
def benchmark_data_advanced(args):
    # benchmark dataloader with more details to help analyze performance bottleneck
    cfg = setup(args)
    benchmark = create_data_benchmark(cfg, args)

    if comm.get_rank() == 0:
        benchmark.benchmark_dataset(100)
        benchmark.benchmark_mapper(100)
        benchmark.benchmark_workers(100, warmup=10)
        benchmark.benchmark_IPC(100, warmup=10)
    if comm.get_world_size() > 1:
        benchmark.benchmark_distributed(100)
        logger.info("Rerun ...")
        benchmark.benchmark_distributed(100)
Ejemplo n.º 18
0
 def reset(self):
     self._working_dir = tempfile.TemporaryDirectory(
         prefix="cityscapes_eval_")
     self._temp_dir = self._working_dir.name
     # All workers will write to the same results directory
     # TODO this does not work in distributed training
     assert (comm.get_local_size() == comm.get_world_size(
     )), "CityscapesEvaluator currently do not work with multiple machines."
     self._temp_dir = comm.all_gather(self._temp_dir)[0]
     if self._temp_dir != self._working_dir.name:
         self._working_dir.cleanup()
     self._logger.info(
         "Writing cityscapes results to temporary directory {} ...".format(
             self._temp_dir))
Ejemplo n.º 19
0
def fcos_losses(
    labels,
    reg_targets,
    logits_pred,
    reg_pred,
    ctrness_pred,
    focal_loss_alpha,
    focal_loss_gamma,
    iou_loss,
):
    num_classes = logits_pred.size(1)
    labels = labels.flatten()

    pos_inds = torch.nonzero(labels != num_classes).squeeze(1)
    num_pos_local = pos_inds.numel()
    num_gpus = get_world_size()
    total_num_pos = reduce_sum(pos_inds.new_tensor([num_pos_local])).item()
    num_pos_avg = max(total_num_pos / num_gpus, 1.0)

    # prepare one_hot
    class_target = torch.zeros_like(logits_pred)
    class_target[pos_inds, labels[pos_inds]] = 1

    class_loss = sigmoid_focal_loss_jit(
        logits_pred,
        class_target,
        alpha=focal_loss_alpha,
        gamma=focal_loss_gamma,
        reduction="sum",
    ) / num_pos_avg

    reg_pred = reg_pred[pos_inds]
    reg_targets = reg_targets[pos_inds]
    ctrness_pred = ctrness_pred[pos_inds]

    ctrness_targets = compute_ctrness_targets(reg_targets)
    ctrness_targets_sum = ctrness_targets.sum()
    ctrness_norm = max(reduce_sum(ctrness_targets_sum).item() / num_gpus, 1e-6)

    reg_loss = iou_loss(reg_pred, reg_targets, ctrness_targets) / ctrness_norm

    ctrness_loss = F.binary_cross_entropy_with_logits(
        ctrness_pred, ctrness_targets, reduction="sum") / num_pos_avg

    losses = {
        "loss_fcos_cls": class_loss,
        "loss_fcos_loc": reg_loss,
        "loss_fcos_ctr": ctrness_loss
    }
    return losses, {}
Ejemplo n.º 20
0
def build_batch_data_loader(dataset,
                            sampler,
                            total_batch_size,
                            *,
                            aspect_ratio_grouping=False,
                            num_workers=0):
    """
    Build a batched dataloader. The main differences from `torch.utils.data.DataLoader` are:
    1. support aspect ratio grouping options
    2. use no "batch collation", because this is common for detection training

    Args:
        dataset (torch.utils.data.Dataset): map-style PyTorch dataset. Can be indexed.
        sampler (torch.utils.data.sampler.Sampler): a sampler that produces indices
        total_batch_size, aspect_ratio_grouping, num_workers): see
            :func:`build_detection_train_loader`.

    Returns:
        iterable[list]. Length of each list is the batch size of the current
            GPU. Each element in the list comes from the dataset.
    """
    world_size = get_world_size()
    assert (
        total_batch_size > 0 and total_batch_size % world_size == 0
    ), "Total batch size ({}) must be divisible by the number of gpus ({}).".format(
        total_batch_size, world_size)

    batch_size = total_batch_size // world_size
    if aspect_ratio_grouping:
        data_loader = torch.utils.data.DataLoader(
            dataset,
            sampler=sampler,
            num_workers=num_workers,
            batch_sampler=None,
            collate_fn=operator.itemgetter(
                0),  # don't batch, but yield individual elements
            worker_init_fn=worker_init_reset_seed,
        )  # yield individual mapped dict
        return AspectRatioGroupedDataset(data_loader, batch_size)
    else:
        batch_sampler = torch.utils.data.sampler.BatchSampler(
            sampler, batch_size,
            drop_last=True)  # drop_last so the batch always have the same size
        return torch.utils.data.DataLoader(
            dataset,
            num_workers=num_workers,
            batch_sampler=batch_sampler,
            collate_fn=trivial_batch_collator,
            worker_init_fn=worker_init_reset_seed,
        )
Ejemplo n.º 21
0
def setup_loaders(cfg):
    loaders = {}
    loaders["train"] = build_data_loader(
        cfg, "MeshVox", "train", multigpu=comm.get_world_size() > 1
    )

    # Since sampling the mesh is now coupled with the data loader, we need to
    # make two different Dataset / DataLoaders for the training set: one for
    # training which uses precomputd samples, and one for evaluation which uses
    # more samples and computes them on the fly. This is sort of gross.
    loaders["train_eval"] = build_data_loader(cfg, "MeshVox", "train_eval", multigpu=False)

    loaders["val"] = build_data_loader(cfg, "MeshVox", "val", multigpu=False)
    return loaders
Ejemplo n.º 22
0
 def setup_myargs_for_multiple_processing(myargs):
     from detectron2.utils import comm
     distributed = comm.get_world_size() > 1
     if distributed and comm.is_main_process():
         # setup logging in the project
         logfile = myargs.args.logfile
         logging_utils.get_logger(filename=logfile,
                                  logger_names=['template_lib', 'tl'],
                                  stream=True)
         logger = logging.getLogger('tl')
         myargs.logger = logger
         myargs.stdout = sys.stdout
         myargs.stderr = sys.stderr
         logging_utils.redirect_print_to_logger(logger=logger)
     return myargs
def main(args):
    cfg = setup(args)

    model = build_model(cfg)
    logger.info("Model:\n{}".format(model))
    DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
        cfg.MODEL.WEIGHTS, resume=args.resume)

    distributed = comm.get_world_size() > 1
    if distributed:
        model = DistributedDataParallel(model,
                                        device_ids=[comm.get_local_rank()],
                                        broadcast_buffers=False)

    do_infer(cfg, args, model)
Ejemplo n.º 24
0
    def forward(self, input):
        if comm.get_world_size() == 1 or not self.training:
            return super().forward(input)

        B, C = input.shape[0], input.shape[1]

        mean = torch.mean(input, dim=[0, 2, 3])
        meansqr = torch.mean(input * input, dim=[0, 2, 3])

        if self._stats_mode == "":
            assert B > 0, 'SyncBatchNorm(stats_mode="") does not support zero batch size.'
            vec = torch.cat([mean, meansqr], dim=0)
            vec = differentiable_all_reduce(vec) * (1.0 /
                                                    dist.get_world_size())
            mean, meansqr = torch.split(vec, C)
            momentum = self.momentum
        else:
            if B == 0:
                vec = torch.zeros([2 * C + 1],
                                  device=mean.device,
                                  dtype=mean.dtype)
                vec = vec + input.sum(
                )  # make sure there is gradient w.r.t input
            else:
                vec = torch.cat([
                    mean, meansqr,
                    torch.ones([1], device=mean.device, dtype=mean.dtype)
                ],
                                dim=0)
            vec = differentiable_all_reduce(vec * B)

            total_batch = vec[-1].detach()
            momentum = total_batch.clamp(
                max=1) * self.momentum  # no update if total_batch is 0
            total_batch = torch.max(
                total_batch, torch.ones_like(total_batch))  # avoid div-by-zero
            mean, meansqr, _ = torch.split(vec / total_batch, C)

        var = meansqr - mean * mean
        invstd = torch.rsqrt(var + self.eps)
        scale = self.weight * invstd
        bias = self.bias - mean * scale
        scale = scale.reshape(1, -1, 1, 1)
        bias = bias.reshape(1, -1, 1, 1)

        self.running_mean += momentum * (mean.detach() - self.running_mean)
        self.running_var += momentum * (var.detach() - self.running_var)
        return input * scale + bias
Ejemplo n.º 25
0
def main(args):
    train_name, num_class = regist_coco_dataset(args.train_annotation, args.thing_classes)
    val_name, _ = regist_coco_dataset(args.val_annotation, args.thing_classes)
    test_name, _ = regist_coco_dataset(args.test_annotation, args.thing_classes)
    cfg, hyperparameters = setup(args, train_name, val_name,test_name, num_class)
    dest_dir = os.path.join(cfg.OUTPUT_DIR, 'sample_compare_result')
    if not args.resume:
        if os.path.isdir(cfg.OUTPUT_DIR):
            shutil.rmtree(cfg.OUTPUT_DIR)
        os.mkdir(cfg.OUTPUT_DIR)
        os.mkdir(dest_dir)
    if hasattr(args, 'opts'):
        mlflow.log_params(hyperparameters)

    model = build_model(cfg)
    logger.info("Model:\n{}".format(model))
    if args.eval_only:
        DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
            cfg.MODEL.WEIGHTS, resume=args.resume
        )
        return do_evaluate(cfg, model)

    distributed = comm.get_world_size() > 1
    if distributed:
        model = DistributedDataParallel(
            model, device_ids=[comm.get_local_rank()], broadcast_buffers=False
        )

    model = do_train(cfg, model, resume=args.resume)
    # mlflow.pytorch.log_model(pytorch_model = model,
    #                      artifact_path = 'model_best',
    #                      conda_env = mlflow.pytorch.get_default_conda_env())
    mlflow.log_artifact(os.path.join(cfg.OUTPUT_DIR, f'model_{os.getenv("MLFLOW_EXPERIMENT_NAME")}.pth'))


    results = do_evaluate(cfg, model)
    mlflow.log_metrics({k + '_bbox':v for k,v in results['bbox'].items()})
    mlflow.log_metrics({k + '_segm':v for k,v in results['segm'].items()}) 
    experiment_name = os.getenv('MLFLOW_EXPERIMENT_NAME')
    
    compare_gt_coco(cfg, annotation_file = args.test_annotation,
    dest_dir = dest_dir,
    weight = os.path.join(cfg.OUTPUT_DIR, f'model_{experiment_name}.pth'),
    score_thres_test = 0.7,
    num_sample = num_class
    )

    mlflow.log_artifacts(dest_dir)
Ejemplo n.º 26
0
    def __init__(self, dataset, num_replicas=None, rank=None):
        """
        Arguments:
             - dataset (:obj:`dataset`): instance of dataset object
        """
        if num_replicas is None:
            num_replicas = comm.get_world_size()
        if rank is None:
            rank = comm.get_rank()

        self.dataset = dataset
        self.num_replicas = num_replicas
        self.rank = rank
        self.epoch = 0
        self.num_samples = len(range(rank, len(self.dataset), num_replicas))
        self.total_size = len(self.dataset)
Ejemplo n.º 27
0
    def __init__(self, cfg):
        """Initializes the CSDTrainer.

        Most of the code is from `super.__init__()`, the only change is that for `self._trainer`
        the `CSDTrainer` is used and weight scheduling parameters are injected into it, look for
        "CSD: ... " comments.
        """
        TrainerBase.__init__(self)  # CSD: don't call `super`'s init as we are overriding it
        logger = logging.getLogger("detectron2")
        if not logger.isEnabledFor(logging.INFO):  # setup_logger is not called for d2
            setup_logger()
        cfg = DefaultTrainer.auto_scale_workers(cfg, comm.get_world_size())

        # Assume these objects must be constructed in this order.
        model = self.build_model(cfg)
        optimizer = self.build_optimizer(cfg, model)
        data_loader = self.build_train_loader(cfg)

        model = create_ddp_model(model, broadcast_buffers=False)
        self._trainer = CSDTrainer(model, data_loader, optimizer)  # CSD: use a CSD-specific trainer
        # CSD: inject weight scheduling parameters into trainer
        (
            self._trainer.solver_csd_beta,
            self._trainer.solver_csd_t0,
            self._trainer.solver_csd_t1,
            self._trainer.solver_csd_t2,
            self._trainer.solver_csd_t,
        ) = (
            cfg.SOLVER.CSD_WEIGHT_SCHEDULE_RAMP_BETA,
            cfg.SOLVER.CSD_WEIGHT_SCHEDULE_RAMP_T0,
            cfg.SOLVER.CSD_WEIGHT_SCHEDULE_RAMP_T1,
            cfg.SOLVER.CSD_WEIGHT_SCHEDULE_RAMP_T2,
            cfg.SOLVER.CSD_WEIGHT_SCHEDULE_RAMP_T,
        )

        self.scheduler = self.build_lr_scheduler(cfg, optimizer)
        self.checkpointer = WandbDetectionCheckpointer(  # CSD: use custom checkpointer (only few lines are added there)
            # Assume you want to save checkpoints together with logs/statistics
            model,
            cfg.OUTPUT_DIR,
            trainer=weakref.proxy(self),
        )
        self.start_iter = 0
        self.max_iter = cfg.SOLVER.MAX_ITER
        self.cfg = cfg

        self.register_hooks(self.build_hooks())
Ejemplo n.º 28
0
    def __init__(self, dataset_dicts, seed: Optional[int] = None):
        """
        Args:
            size (int): the total number of data of the underlying dataset to sample from
            seed (int): the initial seed of the shuffle. Must be the same
                across all workers. If None, will use a random seed shared
                among workers (require synchronization among all workers).
        """
        self._size = len(dataset_dicts)
        assert self._size > 0
        if seed is None:
            seed = comm.shared_random_seed()
        self._seed = int(seed)

        self._rank = comm.get_rank()
        self._world_size = comm.get_world_size()
        self.weights = self._get_class_balance_factor(dataset_dicts)
Ejemplo n.º 29
0
def load_cornell_instances(image_dir, to_polygons=True):
    """
    Args:
        image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train".
        to_polygons (bool): whether to represent the segmentation as polygons
            (COCO's format) instead of masks (cityscapes's format).

    Returns:
        list[dict]: a list of dicts in Detectron2 standard format. (See
        `Using Custom Datasets </tutorials/datasets.html>`_ )
    """
    files = []
    for grasps_file in glob.glob(os.path.join(image_dir, "*cpos.txt")):
        assert os.path.isfile(grasps_file), grasps_file

        cat_id = int(re.search("pcd(\d+)cpos.txt", grasps_file).group(1))

        image_file = grasps_file.replace("cpos.txt", "r.png")
        #image_file = grasps_file.replace("cpos.txt", "d.tiff") #TODO: using depth
        assert os.path.isfile(image_file), image_file

        neg_grasps_file = grasps_file.replace("cpos.txt", "cneg.txt") 
        assert os.path.isfile(neg_grasps_file), neg_grasps_file

        files.append((cat_id, image_file, grasps_file, neg_grasps_file))
    assert len(files), "No images found in {}".format(image_dir)

    logger = logging.getLogger(__name__)
    logger.info("Preprocessing cornell annotations ...")
    # This is still not fast: all workers will execute duplicate works and will
    # take up to 10m on a 8GPU server.
    pool = mp.Pool(processes=max(mp.cpu_count() // get_world_size() // 2, 4))

    ret = pool.map(
        functools.partial(cornell_files_to_dict, to_polygons=to_polygons),
        files,
    )
    logger.info("Loaded {} images from {}".format(len(ret), image_dir))

    # Map ids to contiguous ids
    #dataset_id_to_contiguous_id = {l.id: idx for idx, l in enumerate(os.listdir(image_dir))}
    #for dict_per_image in ret:
    #    for anno in dict_per_image["annotations"]:
    #        anno["category_id"] = dataset_id_to_contiguous_id[anno["category_id"]]

    return ret
Ejemplo n.º 30
0
        def _guess_batch_size():
            # Inputs are meta-arch dependent, the most general solution will be
            # adding a function like `get_batch_size()` to each meta arch
            ret = 1
            try:
                model_input_shapes = model_data(model)["input_shapes"]
                assert isinstance(model_input_shapes, list)
                assert len(model_input_shapes) > 0
                # assuming the first input is a list of images
                ret = len(model_input_shapes[0])
            except Exception:
                ret = cfg.SOLVER.IMS_PER_BATCH // comm.get_world_size()
                logger.warning("Could not get batch size, compute from"
                               f" `cfg.SOLVER.IMS_PER_BATCH`={ret}")
                pass

            return ret