def reduce_loss_dict(loss_dict):
    """
    Reduce the loss dictionary from all processes so that process with rank
    0 has the averaged results. Returns a dict with the same fields as
    loss_dict, after reduction.
    """
    world_size = get_world_size()
    if world_size < 2:
        return loss_dict
    with torch.no_grad():
        loss_names = []
        all_losses = []
        for k, v in loss_dict.items():
            loss_names.append(k)
            all_losses.append(v)
        all_losses = torch.stack(all_losses, dim=0)
        dist.reduce(all_losses, dst=0)
        if dist.get_rank() == 0:
            # only main process gets accumulated, so only divide by
            # world_size in this case
            all_losses /= world_size
        reduced_losses = {k: v for k, v in zip(loss_names, all_losses)}
    return reduced_losses
Exemple #2
0
def reduce_loss_dict(loss_dict):
    """
    Reduce the loss dictionary from all processes so that process with rank
    0 has the averaged results. Returns a dict with the same fields as
    loss_dict, after reduction.
    """
    world_size = get_world_size()
    if world_size < 2:
        return loss_dict
    with torch.no_grad():
        loss_names = []
        all_losses = []
        for k in sorted(loss_dict.keys()):
            loss_names.append(k)
            all_losses.append(loss_dict[k])
        all_losses = torch.stack(all_losses, dim=0)
        dist.reduce(all_losses, dst=0)
        if dist.get_rank() == 0:
            # only main process gets accumulated, so only divide by
            # world_size in this case
            all_losses /= world_size
        reduced_losses = {k: v for k, v in zip(loss_names, all_losses)}
    return reduced_losses
Exemple #3
0
def run_test(cfg,
             model,
             distributed=False,
             test_mode="test",
             val_sets_dict=None):
    synchronize()
    model.eval()
    if distributed:
        model_orig = model
        model = model.module
    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )

    if test_mode == "test":
        dataset_names = cfg.DATASETS.TEST
        data_loaders_val = make_data_loader(cfg,
                                            is_train=False,
                                            is_distributed=distributed)
    else:
        dataset_names = val_sets_dict.keys()
        data_loaders_val = []

        # create data loaders for validation datasets
        num_gpus = get_world_size()
        images_per_batch = cfg.TEST.IMS_PER_BATCH
        assert (
            images_per_batch % num_gpus == 0
        ), "TEST.IMS_PER_BATCH ({}) must be divisible by the number of GPUs ({}) used.".format(
            images_per_batch, num_gpus)
        shuffle = False if not distributed else True
        images_per_gpu = images_per_batch // num_gpus
        num_iters = None
        start_iter = 0
        aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else []

        val_transforms = None if cfg.TEST.BBOX_AUG.ENABLED else build_transforms(
            cfg, False)
        for k, ds in val_sets_dict.items():
            ds.set_keep_difficult(True)

            ds.set_transforms(val_transforms)
            sampler = make_data_sampler(ds, shuffle, distributed)
            batch_sampler = make_batch_data_sampler(ds, sampler,
                                                    aspect_grouping,
                                                    images_per_gpu, num_iters,
                                                    start_iter)
            collator = BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY)
            num_workers = cfg.DATALOADER.NUM_WORKERS
            data_loader = torch.utils.data.DataLoader(
                ds,
                num_workers=num_workers,
                batch_sampler=batch_sampler,
                collate_fn=collator,
            )
            data_loaders_val.append(data_loader)

    sum_mAPs = 0
    for dataset_name, data_loader_val in zip(dataset_names, data_loaders_val):
        results = inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            cfg=cfg,
        )
        synchronize()
        if distributed and not dist.get_rank() == 0:
            continue
        sum_mAPs += results["map"]
    if distributed:
        model = model_orig
    model.train()

    if test_mode == "val":
        train_transforms = build_transforms(cfg, True)
        for k, ds in val_sets_dict.items():
            ds.set_keep_difficult(False)
            ds.set_transforms(train_transforms)
    return sum_mAPs / len(dataset_names)
Exemple #4
0
def do_train(
    cfg,
    model,
    data_loader,
    data_loader_val,
    optimizer,
    scheduler,
    checkpointer,
    device,
    checkpoint_period,
    test_period,
    arguments,
):
    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")
    meters = MetricLogger(delimiter="  ")
    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    model.train()
    start_training_time = time.time()
    end = time.time()

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    dataset_names = cfg.DATASETS.TEST

    for iteration, (images, targets, _) in enumerate(data_loader, start_iter):

        if any(len(target) < 1 for target in targets):
            logger.error(
                f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}"
            )
            continue
        data_time = time.time() - end
        iteration = iteration + 1
        arguments["iteration"] = iteration

        images = images.to(device)
        targets = [target.to(device) for target in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        meters.update(loss=losses_reduced, **loss_dict_reduced)

        optimizer.zero_grad()
        # Note: If mixed precision is not used, this ends up doing nothing
        # Otherwise apply loss scaling for mixed-precision recipe
        # with amp.scale_loss(losses, optimizer) as scaled_losses:
        #     scaled_losses.backward()
        losses.backward()
        optimizer.step()
        scheduler.step()

        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time, data=data_time)

        eta_seconds = meters.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        if iteration % 20 == 0 or iteration == max_iter:
            logger.info(
                meters.delimiter.join([
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)
        if data_loader_val is not None and test_period > 0 and iteration % test_period == 0:
            meters_val = MetricLogger(delimiter="  ")
            synchronize()
            _ = inference(  # The result can be used for additional logging, e. g. for TensorBoard
                model,
                # The method changes the segmentation mask format in a data loader,
                # so every time a new data loader is created:
                make_data_loader(cfg,
                                 is_train=False,
                                 is_distributed=(get_world_size() > 1),
                                 is_for_period=True),
                dataset_name="[Validation]",
                iou_types=iou_types,
                box_only=False
                if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
                device=cfg.MODEL.DEVICE,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=None,
            )
            synchronize()
            model.train()
            with torch.no_grad():
                # Should be one image for each GPU:
                for iteration_val, (images_val, targets_val,
                                    _) in enumerate(tqdm(data_loader_val)):
                    images_val = images_val.to(device)
                    targets_val = [target.to(device) for target in targets_val]
                    loss_dict = model(images_val, targets_val)
                    losses = sum(loss for loss in loss_dict.values())
                    loss_dict_reduced = reduce_loss_dict(loss_dict)
                    losses_reduced = sum(
                        loss for loss in loss_dict_reduced.values())
                    meters_val.update(loss=losses_reduced, **loss_dict_reduced)
            synchronize()
            logger.info(
                meters_val.delimiter.join([
                    "[Validation]: ",
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters_val),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
        if iteration == max_iter:
            checkpointer.save("model_final", **arguments)

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / (max_iter)))
Exemple #5
0
    def __init__(self,
                 dataset,
                 shuffle=True,
                 distributed=False,
                 num_replicas=None,
                 rank=None,
                 args=None,
                 cfg=None):
        self.dataset = dataset
        # this is a list of list of names.
        # the first level correspond to a video, and the second
        # one to the names of the frames in the video
        self.video_data = dataset.video_data
        self.window_size = 1
        self.batch_size_per_gpu = 1
        self.epoch = 0
        self.shuffle = shuffle
        self.distributed = distributed
        self.is_train = True

        if args is not None:
            if hasattr(args, 'window_size'):
                self.window_size = args.window_size
            if hasattr(args, 'is_train'):
                self.is_train = args.is_train
        if cfg is not None:
            self.batch_size_per_gpu = get_batch_size_per_gpu(
                cfg.SOLVER.IMS_PER_BATCH if self.is_train else cfg.TEST.
                IMS_PER_BATCH)

        self.indices = []
        for video_id in sorted(self.video_data):
            frame_list = sorted(self.video_data[video_id])
            count = 0
            frame_ids = []
            for frame_id in sorted(frame_list):
                frame_ids.append(frame_id)
                count += 1
                if count == self.window_size:
                    self.indices.append(frame_ids)
                    frame_ids = []
                    count = 0
            if not args.is_train and count > 0:
                for i in range(self.window_size):
                    frame_ids.append(frame_id)
                    count += 1
                    if count == self.window_size:
                        self.indices.append(frame_ids)
                        frame_ids = []
                        count = 0
                        break
        self.num_samples = len(self.indices)
        self.total_size = self.num_samples
        # print(self.__len__())

        if self.distributed:
            if num_replicas is None:
                num_replicas = get_world_size()
            if rank is None:
                rank = get_rank()
            self.num_replicas = num_replicas
            self.rank = rank
            self.num_samples = int(
                math.ceil(self.num_samples * 1.0 / self.num_replicas))
            self.total_size = self.num_samples * self.num_replicas
Exemple #6
0
def do_train(
    cfg,
    model,
    data_loader_support,
    data_loader_query,
    data_loader_val_support,
    data_loader_val_test,
    optimizer,
    scheduler,
    checkpointer,
    device,
    checkpoint_period,
    test_period,
    arguments,
    meters,
    meters_val,
):

    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")
    # meters = MetricLogger(delimiter="  ")
    max_iter = len(data_loader_support)
    start_iter = arguments["iteration"]
    model.train()
    start_training_time = time.time()
    end = time.time()

    batch_cls_json_file = cfg.MODEL.FEW_SHOT.SUP_INDICE_CLS
    with open(batch_cls_json_file, 'r') as f:
        batch_cls_sup = json.load(f)

    if cfg.MODEL.QRY_BALANCE:
        qry_cls_json_file = cfg.MODEL.QRY_INDICE_CLS
        with open(qry_cls_json_file, 'r') as f:
            batch_cls_qry = json.load(f)

    iou_types = ("bbox",)
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm",)
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints",)
    rank = dist.get_rank()
    # if is_main_process():
    #     import pdb
    #     pdb.set_trace()
    # else:
    #     return
    # for name, param in model. named_parameters():
    #     print(name, param, True if param.grad is not None else False)

    query_iterator = data_loader_query.__iter__()
    # print('len(data_loader_query):', len(data_loader_query))
    # import pdb; pdb.set_trace()
    weights_novel_all = []
    iteration_qry = 0
    for iteration, (images_sup, targets_sup, idx) in enumerate(data_loader_support, start_iter):
        if any(len(target) < 1 for target in targets_sup):
            logger.error(f"Iteration={iteration + 1} || Image Ids used for training support {idx} || targets Length={[len(target) for target in targets_sup]}")
            continue
        data_time = time.time() - end
        batch_id = batch_cls_sup[rank][iteration]

        iteration = iteration + 1
        arguments["iteration"] = iteration
        scheduler.step()
        images_sup = images_sup.to(device)
        targets_sup = [target.to(device) for target in targets_sup]
        # update weight:
        # print(targets_sup)
        # if is_main_process():
        #     import pdb
        #     pdb.set_trace()
        # else:
        #     return
        # print(iteration, idx, batch_id, targets_sup[0].extra_fields)

        weight_novel = model(images_sup, targets_sup,
                             is_support=True, batch_id=batch_id)
        # weights_novel[rank] = weight_novel
        # print('batch_id', batch_id, weight_novel[:10])
        # weight_novel = {batch_id:weight_novel}
        torch.cuda.empty_cache()

        # synchronize()
        weights_novel = [torch.empty_like(weight_novel)
                         for i in range(dist.get_world_size())]
        weights_novel = torch.cat(
            diffdist.functional.all_gather(weights_novel, weight_novel))
        # print(weights_novel[:,:10])
        # if is_main_process():
        #     import pdb
        #     pdb.set_trace()
        # else:
        #     return
        weights_novel_all.append(weights_novel)
        # # print(weights_novel_all)
        # print(torch.cat(weights_novel_all).size())
        # print(torch.cat(weights_novel_all)[:,:10])
        # (torch.cat(gather_list) * torch.cat(gather_list)).mean().backward()
        # print(weights_novel)
        if iteration % iter_size == 0:
            optimizer.zero_grad()
            losses_reduced = 0
            loss_dict_all = {}
            for i in range(iter_size_qry):
                images_qry, targets_qry, idx = query_iterator.next()
                images_qry = images_qry.to(device)
                targets_qry = [target.to(device) for target in targets_qry]
                if cfg.MODEL.QRY_BALANCE:
                    batch_id_qry = batch_cls_qry[rank][iteration_qry]
                    iteration_qry += 1
                    loss_dict = model(images_qry, targets_qry,
                                      is_query=True, batch_id=batch_id_qry, weights_novel=torch.cat(weights_novel_all))
                else:
                    loss_dict = model(images_qry, targets_qry,
                                      is_query=True, weights_novel=torch.cat(weights_novel_all))
                # if is_main_process():
                #     print('loss_dict', loss_dict)
                losses = sum(loss for loss in loss_dict.values()
                             ) / iter_size_qry
                # losses.backward(retain_graph=True)
                with amp.scale_loss(losses, optimizer) as scaled_losses:
                    scaled_losses.backward(retain_graph=True)
                torch.cuda.empty_cache()
                loss_dict_all = add_dict(loss_dict_all, loss_dict)
            loss_dict_all = avg_dict(loss_dict_all)
            # if is_main_process():
            #     print('loss_dict_all', loss_dict_all)
            # reduce losses over all GPUs for logging purposes
            loss_dict_reduced = reduce_loss_dict(loss_dict_all)
            # if is_main_process():
            #     print('loss_dict_reduced', loss_dict_reduced)
            losses_reduced = sum(loss for loss in loss_dict_reduced.values())
            # losses_dict_reduced = add_dict(losses_dict_reduced, loss_dict_reduced)

            meters.update(iteration / iter_size_qry, loss=losses_reduced,
                          lr=optimizer.param_groups[0]["lr"], **loss_dict_reduced)

            weights_novel_all = []

            # (weights_novel * weights_novel).mean().backward()
            # for name, param in model. named_parameters():
            # if 'backbone' not in name:
            # print(name, True if param.grad is not None else False)
            optimizer.step()
            batch_time = time.time() - end
            end = time.time()
            meters.update(iteration, time=batch_time, data=data_time)
            eta_seconds = meters.time.global_avg * (max_iter - iteration)
            eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
            torch.cuda.empty_cache()
        if iteration % 20 == 0 or iteration == max_iter:
            logger.info(
                meters.delimiter.join(
                    [
                        "eta: {eta}",
                        "iter: {iter}",
                        "{meters}",
                        "lr: {lr:.6f}",
                        "max mem: {memory:.0f}",
                    ]
                ).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                )
            )
        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)
        if data_loader_val_support is not None and test_period > 0 and iteration % test_period == 0:
            # meters_val = MetricLogger(delimiter="  ")
            synchronize()
            # """
            model.train()
            with torch.no_grad():
                weights_novel_val_sup_all = []
                current_classifier_novel = torch.zeros(
                    [iter_size * nGPU, 1024]).to(device)
                # print(current_classifier_novel)
                avg_steps = 0
                for iteration_val_sup, (images_val_sup, targets_val_sup, idx_val_sup) in enumerate(tqdm(data_loader_val_support)):
                    if any(len(target) < 1 for target in targets_val_sup):
                        logger.error(f"Iteration={iteration + 1} || Image Ids used for training support {idx_val_sup} || targets Length={[len(target) for target in targets_val_sup]}")
                        continue
                    batch_id_val_sup = batch_cls_sup[rank][int(
                        iteration_val_sup)]
                    # print(iteration_val_sup)

                    images_val_sup = images_val_sup.to(device)
                    targets_val_sup = [target.to(device)
                                       for target in targets_val_sup]
                    weight_novel_val_sup = model(images_val_sup, targets_val_sup,
                                                 is_support=True, batch_id=batch_id_val_sup)
                    # weights_novel[rank] = weight_novel_val_sup
                    # print(weight_novel_val_sup.size())
                    # print('before', weight_novel_val_sup)
                    # print('batch_id', batch_id, weight_novel_val_sup[:10])
                    # weight_novel_val_sup = {batch_id:weight_novel_val_sup}
                    torch.cuda.empty_cache()

                    # synchronize()
                    weights_novel_val_sup = [torch.empty_like(weight_novel_val_sup)
                                             for i in range(dist.get_world_size())]
                    dist.all_gather(weights_novel_val_sup,
                                    weight_novel_val_sup)
                    # weights_novel_val_sup = torch.cat(
                    #     all_gather(weight_novel_val_sup))
                    # print('after', weights_novel_val_sup)
                    # print(idx, weights_novel_val_sup)
                    # print(weights_novel_val_sup[:,:10])
                    # if is_main_process():
                    #     import pdb
                    #     pdb.set_trace()
                    # else:
                    #     return
                    weights_novel_val_sup_all.append(
                        torch.cat(weights_novel_val_sup))
                    # print('length', len(weights_novel_val_sup_all))

                    if (iteration_val_sup + 1) % iter_size_qry == 0:
                        # print(torch.cat(weights_novel_val_sup_all).size())
                        # weights_novel_val_sup_all = []
                        avg_steps += 1
                        # print('current_classifier_novel', current_classifier_novel)
                        # print('weights_novel_val_sup_all', weights_novel_val_sup_all)
                        current_classifier_novel = current_classifier_novel + \
                            torch.cat(weights_novel_val_sup_all)
                        weights_novel_val_sup_all = []

                # if is_main_process():
                #     import pdb
                #     pdb.set_trace()
                # else:
                #     return
                # print(iteration_val_sup)
                current_classifier_novel_avg = current_classifier_novel / avg_steps
                model.module.roi_heads.box.cls_weights = torch.cat([model.module.roi_heads.box.predictor.cls_score.weight,
                                                                    current_classifier_novel_avg])
                # """
            output_folder = os.path.join(cfg.OUTPUT_DIR, "Validation")
            mkdir(output_folder)
            np.save(os.path.join(output_folder, 'cls_weights_'+str(iteration / iter_size_qry)), np.array(model.module.roi_heads.box.cls_weights.cpu().data))

            res_infer = inference(  # The result can be used for additional logging, e. g. for TensorBoard
                model,
                iteration / iter_size,
                # The method changes the segmentation mask format in a data loader,
                # so every time a new data loader is created:
                make_data_loader(cfg, is_train=False, is_distributed=(
                    get_world_size() > 1), is_for_period=True),
                dataset_name="[Validation]",
                iou_types=iou_types,
                box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
                device=cfg.MODEL.DEVICE,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=output_folder,
            )
            # import pdb; pdb.set_trace()
            if res_infer:
                meters_val.update(iteration / iter_size, **res_infer)

            synchronize()
            # print('eval')
            model.train()

            """
            with torch.no_grad():
                # Should be one image for each GPU:
                for iteration_val, (images_val, targets_val, _) in enumerate(tqdm(data_loader_val_test)):
                    images_val = images_val.to(device)
                    targets_val = [target.to(device) for target in targets_val]
                    loss_dict = model(images_val, targets_val)
                    losses = sum(loss for loss in loss_dict.values())
                    loss_dict_reduced = reduce_loss_dict(loss_dict)
                    losses_reduced = sum(
                        loss for loss in loss_dict_reduced.values())
                    meters_val.update(
                        iteration / iter_size, loss=losses_reduced, **loss_dict_reduced)
            """
            synchronize()
            logger.info(
                meters_val.delimiter.join(
                    [
                        "[Validation]: ",
                        "eta: {eta}",
                        "iter: {iter}",
                        "{meters}",
                        "lr: {lr:.6f}",
                        "max mem: {memory:.0f}",
                    ]
                ).format(
                    eta=eta_string,
                    iter=iteration / iter_size,
                    meters=str(meters_val),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                )
            )
#             """
        if iteration == max_iter:
            checkpointer.save("model_final", **arguments)
            # import json
            # json.dump(model.module.roi_heads.box.cls_weights, open(os.path.join(output_folder, 'cls_weights.json'), 'w'))

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info(
        "Total training time: {} ({:.4f} s / it)".format(
            total_time_str, total_training_time / (max_iter)
        )
    )
def make_cls_data_loader(cfg,
                         is_train=True,
                         domains=['clean'],
                         is_distributed=False,
                         start_iter=0):
    #if 'clean' in domains:
    #    assert (len(domains)==1)
    num_gpus = get_world_size()
    if is_train:
        images_per_batch = cfg.SOLVER.IMS_PER_BATCH

        assert (images_per_batch % num_gpus == 0
                ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number "
        "of GPUs ({}) used.".format(images_per_batch, num_gpus)

        images_per_gpu = images_per_batch // num_gpus

        if cfg.MODEL.DOMAIN_ADAPTATION_ON:
            assert (
                images_per_batch % ((NUM_TARGET_DOMAINS + 1) * num_gpus) == 0
            ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by {} times the number "
            "of GPUs ({}) used.".format(images_per_batch,
                                        NUM_TARGET_DOMAINS + 1, num_gpus)

            images_per_gpu = images_per_batch // (
                (NUM_TARGET_DOMAINS + 1) * num_gpus)

        shuffle = True
        num_iters = cfg.SOLVER.MAX_ITER
    else:
        images_per_batch = cfg.TEST.IMS_PER_BATCH
        assert (images_per_batch % num_gpus == 0
                ), "TEST.IMS_PER_BATCH ({}) must be divisible by the number "
        "of GPUs ({}) used.".format(images_per_batch, num_gpus)
        images_per_gpu = images_per_batch // num_gpus
        shuffle = False if not is_distributed else True
        num_iters = None
        start_iter = 0

    if images_per_gpu > 1:
        logger = logging.getLogger(__name__)
        logger.warning(
            "When using more than one image per GPU you may encounter "
            "an out-of-memory (OOM) error if your GPU does not have "
            "sufficient memory. If this happens, you can reduce "
            "SOLVER.IMS_PER_BATCH (for training) or "
            "TEST.IMS_PER_BATCH (for inference). For training, you must "
            "also adjust the learning rate and schedule length according "
            "to the linear scaling rule. See for example: "
            "https://github.com/facebookresearch/Detectron/blob/master/configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml#L14"
        )

    # group images which have similar aspect ratio. In this case, we only
    # group in two cases: those with width / height > 1, and the other way around,
    # but the code supports more general grouping strategy
    aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else []

    paths_catalog = import_file("maskrcnn_benchmark.config.paths_catalog",
                                cfg.PATHS_CATALOG, True)
    DatasetCatalog = paths_catalog.DomainDatasetCatalog

    if is_train:
        if cfg.MODEL.DOMAIN_ADAPTATION_ON:
            dataset_list = []
            for i, domain in enumerate(domains):
                if domain == 'clean':
                    dataset_list.append(cfg.DATASETS.SOURCE_TRAIN)
                elif domain == 'foggy':
                    dataset_list.append(cfg.DATASETS.FOGGY_TRAIN)
                elif domain == 'snowy':
                    dataset_list.append(cfg.DATASETS.SNOWY_TRAIN)
                else:
                    dataset_list = None
                    raise NotImplementedError("Unknown domain")
        else:
            dataset_list = cfg.DATASETS.TRAIN
    else:
        dataset_list = cfg.DATASETS.TEST

    transforms = build_transforms(cfg, is_train)
    datasets = build_cls_dataset(dataset_list, transforms, DatasetCatalog,
                                 is_train, domains)

    data_loaders = []
    for dataset in datasets:
        sampler = make_data_sampler(dataset, shuffle, is_distributed)
        batch_sampler = make_batch_data_sampler(dataset, sampler,
                                                aspect_grouping,
                                                images_per_gpu, num_iters,
                                                start_iter)
        collator = BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY)
        num_workers = cfg.DATALOADER.NUM_WORKERS
        data_loader = torch.utils.data.DataLoader(
            dataset,
            num_workers=num_workers,
            batch_sampler=batch_sampler,
            collate_fn=collator,
        )
        data_loaders.append(data_loader)
    #if is_train:
    #    # during training, a single (possibly concatenated) data_loader is returned
    #    assert len(data_loaders) == 1
    #    return data_loaders[0]
    return data_loaders
Exemple #8
0
def do_train(
    cfg,
    total_model,
    data_loader,
    data_loader_val,
    optimizer,
    scheduler,
    checkpointer,
    device,
    checkpoint_period,
    test_period,
    arguments,
    args,
):
    if len(total_model) > 1:
        model = total_model[1]
        t_model = total_model[0]
    else:
        model = total_model[0]
    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")
    meters = MetricLogger(delimiter="  ")
    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    model.train()

    start_training_time = time.time()
    end = time.time()

    iou_types = ("bbox", )
    if cfg[0].MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg[0].MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    dataset_names = cfg[0].DATASETS.TEST

    pytorch_1_1_0_or_later = is_pytorch_1_1_0_or_later()
    for iteration, (images, targets, _) in enumerate(data_loader, start_iter):
        data_time = time.time() - end
        iteration = iteration + 1
        arguments["iteration"] = iteration

        # in pytorch >= 1.1.0, scheduler.step() should be run after optimizer.step()
        if not pytorch_1_1_0_or_later:
            scheduler.step()

        images = images.to(device)
        targets = [target.to(device) for target in targets]

        loss_dict, features_dict = model(images, targets)
        if len(total_model) > 1:
            with torch.no_grad():
                t_loss_dict, t_features_dict = t_model(images, targets)
            # with torch.no_grad():
            #     # teacher_model = t_model
            #     t_weight = torch.load('./weights/centermask-V-19-eSE-FPN-ms-3x.pth')
            #     t_weight = t_weight['model']
            #     new_tweight = OrderedDict()
            #     for k, v in t_weight.items():
            #         name = k[7:]  # remove `module.`
            #         new_tweight[name] = v
            #     t_model.load_state_dict(new_tweight)
            #     t_loss_dict, t_features_dict = t_model(images, targets)

        if args.loss_head:

            loss_regression = new_box_loss(t_loss_dict['loss_reg'],
                                           loss_dict['loss_reg'])
            loss_center = new_center_loss(t_loss_dict['loss_centerness'],
                                          loss_dict['loss_centerness'])
            mode = 'KL'  # mode = 'KL' or 'cross-entropy'
            loss_pixel_wise = pixel_wise_loss(features_dict['box_cls'],
                                              t_features_dict['box_cls'], mode)
            loss_head = (loss_regression + loss_center + loss_pixel_wise)
            loss_dict.setdefault('loss_head', loss_head)
            del loss_dict['loss_reg']
            del loss_dict['loss_centerness']

        if iteration > cfg[0].SOLVER.WARMUP_ITERS:
            if args.loss_correlation:
                correlation = True
                loss_corr = get_feature(t_model, model, images, targets,
                                        correlation)
                loss_dict.setdefault('loss_corr', loss_corr)
            if args.loss_featuremap:
                correlation = False
                loss_featuremap = get_feature(t_model, model, images, targets,
                                              correlation)
                loss_dict.setdefault('loss_featuremap', loss_featuremap)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        meters.update(loss=losses_reduced, **loss_dict_reduced)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if pytorch_1_1_0_or_later:
            scheduler.step()

        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time, data=data_time)

        eta_seconds = meters.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        if iteration % 20 == 0 or iteration == max_iter:
            logger.info(
                meters.delimiter.join([
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)
        if data_loader_val is not None and test_period > 0 and iteration % test_period == 0 and iteration != 0:
            meters_val = MetricLogger(delimiter="  ")
            synchronize()
            _ = inference(  # The result can be used for additional logging, e. g. for TensorBoard
                model,
                # The method changes the segmentation mask format in a data loader,
                # so every time a new data loader is created:
                make_data_loader(cfg[0],
                                 is_train=False,
                                 is_distributed=(get_world_size() > 1),
                                 is_for_period=True),
                dataset_name="[Validation]",
                iou_types=iou_types,
                box_only=False
                if cfg[0].MODEL.MASK_ON else cfg[0].MODEL.RPN_ONLY,
                device=cfg[0].MODEL.DEVICE,
                expected_results=cfg[0].TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg[0].TEST.
                EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=None,
            )
            synchronize()
            model.train()
            with torch.no_grad():
                # Should be one image for each GPU:
                for iteration_val, (images_val, targets_val,
                                    _) in enumerate(tqdm(data_loader_val)):
                    images_val = images_val.to(device)
                    targets_val = [target.to(device) for target in targets_val]
                    loss_dict = model(images_val, targets_val)
                    if len(loss_dict) > 1:
                        loss_dict = loss_dict[0]
                    else:
                        loss_dict = loss_dict
                    losses = sum(loss for loss in loss_dict.values())
                    loss_dict_reduced = reduce_loss_dict(loss_dict)
                    losses_reduced = sum(
                        loss for loss in loss_dict_reduced.values())
                    meters_val.update(loss=losses_reduced, **loss_dict_reduced)
            synchronize()
            logger.info(
                meters_val.delimiter.join([
                    "[Validation]: ",
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters_val),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
        if iteration == max_iter:
            checkpointer.save("model_final", **arguments)

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / (max_iter)))
Exemple #9
0
def make_data_loader(root_path,
                     cfg,
                     is_train=True,
                     is_distributed=False,
                     start_iter=0,
                     class_ids=None,
                     ignore_labels=False):
    num_gpus = get_world_size()
    if is_train:
        images_per_batch = cfg.SOLVER.IMS_PER_BATCH
        assert (images_per_batch % num_gpus == 0
                ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number "
        "of GPUs ({}) used.".format(images_per_batch, num_gpus)
        images_per_gpu = images_per_batch // num_gpus
        shuffle = True
        num_iters = cfg.SOLVER.MAX_ITER
    else:
        images_per_batch = cfg.TEST.IMS_PER_BATCH
        assert (images_per_batch % num_gpus == 0
                ), "TEST.IMS_PER_BATCH ({}) must be divisible by the number "
        "of GPUs ({}) used.".format(images_per_batch, num_gpus)
        images_per_gpu = images_per_batch // num_gpus
        shuffle = False if not is_distributed else True
        num_iters = None
        start_iter = 0

    if images_per_gpu > 1:
        logger = logging.getLogger(
            "maskrcnn_benchmark.dataset_gtboxframe.make_data_loader")
        logger.warning(
            "When using more than one image per GPU you may encounter "
            "an out-of-memory (OOM) error if your GPU does not have "
            "sufficient memory. If this happens, you can reduce "
            "SOLVER.IMS_PER_BATCH (for training) or "
            "TEST.IMS_PER_BATCH (for inference). For training, you must "
            "also adjust the learning rate and schedule length according "
            "to the linear scaling rule. See for example: "
            "https://github.com/facebookresearch/Detectron/blob/master/configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml#L14"
        )

    # group images which have similar aspect ratio. In this case, we only
    # group in two cases: those with width / height > 1, and the other way around,
    # but the code supports more general grouping strategy
    aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else []

    transforms = None if not is_train and cfg.TEST.BBOX_AUG.ENABLED else build_transforms(
        cfg, is_train)

    dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST

    if not is_train and not ignore_labels:
        assert class_ids is not None, "For validation datasets, class_ids has to be provided!"

    datasets = [
        build_detection_dataset_by_name(root_path,
                                        name,
                                        transforms,
                                        class_ids=class_ids,
                                        cache_images=False,
                                        ignore_labels=ignore_labels)
        for name in dataset_list
    ]

    if is_train:
        assert len(
            datasets
        ) == 1, "Can train on only one dataset, otherwise have to merge classes"
        class_ids = datasets[0].get_class_ids()

    data_loaders = []
    for dataset in datasets:
        sampler = make_data_sampler(dataset, shuffle, is_distributed)
        batch_sampler = make_batch_data_sampler(dataset, sampler,
                                                aspect_grouping,
                                                images_per_gpu, num_iters,
                                                start_iter)
        collator = BBoxAugCollator() if not is_train and cfg.TEST.BBOX_AUG.ENABLED else \
            BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY)
        num_workers = cfg.DATALOADER.NUM_WORKERS
        data_loader = torch.utils.data.DataLoader(
            dataset,
            num_workers=num_workers,
            batch_sampler=batch_sampler,
            collate_fn=collator,
        )
        data_loaders.append(data_loader)
    if is_train:
        # during training a single (possibly concatenated) data_loader is returned
        assert len(data_loaders) == 1
        return data_loaders[0], class_ids
    return data_loaders
Exemple #10
0
def do_train(model,
             data_loader,
             optimizer,
             scheduler,
             checkpointer,
             device,
             checkpoint_period,
             arguments,
             logger,
             tensorboard_writer: TensorboardWriter = None):
    logger.info("Start training")
    meters = MetricLogger(delimiter="  ")
    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    model.train()
    start_training_time = time.time()
    end = time.time()

    for iteration, (images, targets, _) in enumerate(data_loader, start_iter):

        if any(len(target) < 1 for target in targets):
            logger.error(
                "Iteration={iteration + 1} || Image Ids used for training {_} || "
                "targets Length={[len(target) for target in targets]}")
            continue

        data_time = time.time() - end
        iteration = iteration + 1
        arguments["iteration"] = iteration

        scheduler.step()

        images = images.to(device)
        targets = [target.to(device) for target in targets]

        result, loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        meters.update(loss=losses_reduced, **loss_dict_reduced)

        optimizer.zero_grad()
        # Note: If mixed precision is not used, this ends up doing nothing
        # Otherwise apply loss scaling for mixed-precision recipe
        with amp.scale_loss(losses, optimizer) as scaled_losses:
            scaled_losses.backward()
        optimizer.step()

        # write images / ground truth / evaluation metrics to tensorboard
        tensorboard_writer(iteration, losses_reduced, loss_dict_reduced,
                           images, targets)

        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time, data=data_time)
        eta_seconds = meters.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        if get_world_size() < 2 or dist.get_rank() == 0:
            if iteration % 20 == 0 or iteration == max_iter:
                logger.info(
                    meters.delimiter.join([
                        "eta: {eta}",
                        "iter: {iter}",
                        "{meters}",
                        "lr: {lr:.6f}",
                    ]).format(
                        eta=eta_string,
                        iter=iteration,
                        meters=str(meters),
                        lr=optimizer.param_groups[0]["lr"],
                    ))
        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)
        if iteration == max_iter:
            checkpointer.save("model_final", **arguments)

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / (max_iter)))
Exemple #11
0
def do_train(
    model,
    data_loader,
    optimizer,
    scheduler,
    checkpointer,
    device,
    checkpoint_period,
    arguments,
    vis,
    distributed,
    cfg,
):
    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")
    meters = MetricLogger(delimiter="  ")
    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    model.train()
    start_training_time = time.time()
    end = time.time()
    for iteration, (images, targets, _) in enumerate(data_loader, start_iter):
        #import pdb
        #pdb.set_trace()
        data_time = time.time() - end
        iteration = iteration + 1
        arguments["iteration"] = iteration

        scheduler.step()

        images = images.to(device)
        targets = [target.to(device) for target in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        meters.update(loss=losses_reduced, **loss_dict_reduced)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time, data=data_time)

        eta_seconds = meters.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        if iteration % 20 == 0 or iteration == max_iter or iteration == 1:
            logger.info(
                meters.delimiter.join(
                    [
                        "eta: {eta}",
                        "iter: {iter}",
                        "{meters}",
                        "lr: {lr:.6f}",
                        "max mem: {memory:.0f}",
                    ]
                ).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                )
            )
            #loss_box_reg_mean = meters.meters['loss_box_reg'].global_avg
            #print(loss_box_reg_mean)
            if get_world_size()<2 or dist.get_rank() == 0:
                vis.add_value('loss', meters.meters['loss'].global_avg)
                vis.add_value('loss_box_reg', meters.meters['loss_box_reg'].global_avg)
                vis.add_value('loss_classifier', meters.meters['loss_classifier'].global_avg)
                vis.add_value('loss_objectness', meters.meters['loss_objectness'].global_avg)
                vis.add_value('loss_rpn_box_reg', meters.meters['loss_rpn_box_reg'].global_avg)
            # model.train()
            # results, results_coco=run_validation(cfg, model, distributed)
            # model.train()
            #
            # AP = results.results['bbox']['AP']
            # AP50 = results.results['bbox']['AP50']
            # AP75 = results.results['bbox']['AP75']
            # APs = results.results['bbox']['APs']
            # APm = results.results['bbox']['APm']
            # APl = results.results['bbox']['APl']
            # print("Inference--Iteration: {}, AP:{:.4f}".format(iteration, AP))
            # if get_world_size()<2 or dist.get_rank() == 0:
            #     vis.add_value('AP', AP)
            #     vis.add_value('AP50', AP50)
            #     vis.add_value('AP75', AP75)
            #     vis.add_value('APs', APs)
            #     vis.add_value('APm', APm)
            #     vis.add_value('APl', APl)
        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)
        if iteration == max_iter:
            checkpointer.save("model_final", **arguments)

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info(
        "Total training time: {} ({:.4f} s / it)".format(
            total_time_str, total_training_time / (max_iter)
        )
    )
Exemple #12
0
def do_train(model,
             cfg,
             data_loader,
             data_loader_val,
             optimizer,
             scheduler,
             checkpointer,
             device,
             checkpoint_period,
             test_period,
             arguments,
             output_dir='',
             visualize_loss='',
             vis_title='',
             iters_per_epoch=0):
    max_iter = len(data_loader)
    # arguments["iteration"] = max_iter
    start_iter = arguments["iteration"]
    if start_iter >= max_iter:
        checkpointer.save("model_{:07d}".format(start_iter), **arguments)
        checkpointer.save(
            "model_epoch_{:07d}".format(
                int(math.ceil(start_iter / iters_per_epoch))), **arguments)
        return

    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")

    meters = TensorboardXLogger(log_dir=os.path.join(output_dir,
                                                     'tensorboardX'),
                                delimiter="  ")

    model.train()
    start_training_time = time.time()
    end = time.time()

    mkdir(output_dir)

    if visualize_loss == "visdom" and is_main_process():
        from maskrcnn_benchmark.utils.visualization.visdom_visualizer import VisdomVisualizer
        vis_legend = None
        visualizer = VisdomVisualizer()
    else:
        visualizer = None

    scheduler.step(start_iter - 1)

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    dataset_names = cfg.DATASETS.TEST

    if is_main_process():
        tq = tqdm.tqdm(total=len(data_loader), initial=start_iter)
    for iteration, batch in enumerate(data_loader, start_iter):
        images = batch[0]
        if len(batch) > 2:
            info = batch[2]
        else:
            info = None
        reg_targets = None
        seg_targets = None
        if isinstance(batch[1], dict):
            roi_targets = batch[1]["roi_target"]
            if "reg_target" in batch[1].keys():
                reg_targets = batch[1]["reg_target"]
            if "seg_target" in batch[1].keys():
                seg_targets = batch[1]["seg_target"]
        else:
            roi_targets = batch[1]

        # for target in roi_targets:
        #     print('labels: ', target.extra_fields['labels'])
        #     print('second_labels: ', target.extra_fields['second_labels'])

        if any(len(target) < 1 for target in roi_targets):
            roi_targets = None
        #     logger.error(f"Iteration={iteration+1} || Image Ids used for training {_} || targets Length={[len(target) for target in roi_targets]}")
        #     continue

        # print(info)
        # print(roi_targets)

        data_time = time.time() - end

        iteration = iteration + 1
        arguments["iteration"] = iteration

        if is_main_process():
            tq.set_description('Iteration {}'.format(iteration))
            tq.update(1)

        images = images.to(device)
        if roi_targets is not None:
            roi_targets = [target.to(device) for target in roi_targets]
        if reg_targets is not None:
            reg_targets = reg_targets.to(device)
        if seg_targets is not None:
            seg_targets = seg_targets.to(device)
        global_targets = dict(reg_targets=reg_targets, seg_targets=seg_targets)
        loss_dict = model(images, roi_targets, global_targets=global_targets)

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        meters.update(iteration, loss=losses_reduced, **loss_dict_reduced)

        losses = sum(loss for loss in loss_dict.values())

        if visualizer:
            if vis_legend is None:
                vis_legend = [key for key in sorted(loss_dict_reduced.keys())]
                vis_legend.append('Total loss')
                iter_plot = visualizer.create_vis_plot('Iteration', 'loss',
                                                       vis_title, vis_legend)
            for key in sorted(loss_dict_reduced.keys()):
                visualizer.update_vis_plot(iteration=iteration,
                                           loss=loss_dict_reduced[key],
                                           window=iter_plot,
                                           name=key,
                                           update_type='append')
            visualizer.update_vis_plot(iteration=iteration,
                                       loss=losses_reduced.data,
                                       window=iter_plot,
                                       name='Total loss',
                                       update_type='append')

        optimizer.zero_grad()

        # Note: If mixed precision is not used, this ends up doing nothing
        # Otherwise apply loss scaling for mixed-precision recipe
        if device == get_device(str_device='cuda'):
            with amp.scale_loss(losses, optimizer) as scaled_losses:
                scaled_losses.backward()
        else:
            # pooling doesn't support CPU
            # losses.backward()
            pass

        optimizer.step()
        scheduler.step()

        batch_time = time.time() - end
        end = time.time()
        meters.update(iteration, time=batch_time, data=data_time)

        eta_seconds = meters.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        # tq.set_postfix(
        #     log="max mem: {.0f}, lr: {.6f}, loss: {.6f}".format(
        #         float(torch.cuda.max_memory_allocated() / 1024.0 / 1024.0),
        #         float(optimizer.param_groups[0]["lr"]),
        #         float(losses_reduced)
        #     )
        # )

        if iteration % 20 == 0 or iteration == max_iter:
            logger.info(
                meters.delimiter.join([
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)
        if iters_per_epoch > 0:
            if iteration % iters_per_epoch == 0:
                checkpointer.save(
                    "model_epoch_{:07d}".format(
                        int(math.ceil(iteration / iters_per_epoch))),
                    **arguments)

        if data_loader_val is not None and test_period > 0 and iteration % test_period == 0:
            meters_val = TensorboardXLogger(log_dir=None, delimiter="  ")
            synchronize()
            _ = inference(  # The result can be used for additional logging, e. g. for TensorBoard
                model,
                # The method changes the segmentation mask format in a data loader,
                # so every time a new data loader is created:
                make_data_loader(cfg,
                                 is_train=False,
                                 is_distributed=(get_world_size() > 1),
                                 is_for_period=True),
                dataset_name="[Validation]",
                iou_types=iou_types,
                box_only=False
                if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
                device=cfg.MODEL.DEVICE,
                bbox_aug=cfg.TEST.BBOX_AUG.ENABLED,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=None,
            )
            synchronize()
            model.train()
            with torch.no_grad():
                # Should be one image for each GPU:
                for iteration_val, (images_val, targets_val,
                                    _) in enumerate(tqdm(data_loader_val)):
                    images_val = images_val.to(device)
                    targets_val = [target.to(device) for target in targets_val]
                    loss_dict = model(images_val, targets_val)
                    losses = sum(loss for loss in loss_dict.values())
                    loss_dict_reduced = reduce_loss_dict(loss_dict)
                    losses_reduced = sum(
                        loss for loss in loss_dict_reduced.values())
                    meters_val.update(loss=losses_reduced, **loss_dict_reduced)
            synchronize()
            logger.info(
                meters_val.delimiter.join([
                    "[Validation]: ",
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters_val),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))

    iteration = len(data_loader)
    if is_main_process():
        tq.close()

    checkpointer.save("model_{:07d}".format(iteration), **arguments)
    checkpointer.save(
        "model_epoch_{:07d}".format(int(math.ceil(
            iteration / iters_per_epoch))), **arguments)
    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / max_iter))
Exemple #13
0
         )
     )
     summary_writter.add_scalar('losses/total_loss', losses_reduced, global_step=iteration)
     for loss_name, loss_item in loss_dict_reduced.items():
         summary_writter.add_scalar('losses/{}'.format(loss_name), loss_item, global_step=iteration)
     summary_writter.add_scalar('lr', optimizer.param_groups[0]['lr'], global_step=iteration)
 if iteration % checkpoint_period == 0:
     checkpointer.save("model_{:07d}".format(iteration), **arguments)
 if data_loader_val is not None and test_period > 0 and iteration % test_period == 0:
     meters_val = MetricLogger(delimiter="  ")
     synchronize()
     _ = inference(  # The result can be used for additional logging, e. g. for TensorBoard
         model,
         # The method changes the segmentation mask format in a data loader,
         # so every time a new data loader is created:
         make_data_loader(cfg, is_train=False, is_distributed=(get_world_size() > 1), is_for_period=True),
         dataset_name="[Validation]",
         iou_types=iou_types,
         box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
         device=cfg.MODEL.DEVICE,
         expected_results=cfg.TEST.EXPECTED_RESULTS,
         expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
         output_folder=None,
     )
     synchronize()
     model.train()
     with torch.no_grad():
         # Should be one image for each GPU:
         for iteration_val, (images_val, targets_val, _) in enumerate(tqdm(data_loader_val)):
             images_val = images_val.to(device)
             targets_val = [target.to(device) for target in targets_val]
Exemple #14
0
def make_data_loader(cfg, is_train=True, is_distributed=False, start_iter=0):
    num_gpus = get_world_size()
    if is_train:
        images_per_batch = cfg.SOLVER.IMS_PER_BATCH  # 2
        assert (images_per_batch % num_gpus == 0
                ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number "
        "of GPUs ({}) used.".format(images_per_batch, num_gpus)

        images_per_gpu = images_per_batch // num_gpus
        shuffle = True
        num_iters = cfg.SOLVER.MAX_ITER  # 720000
    else:
        images_per_batch = cfg.TEST.IMS_PER_BATCH  # 1
        assert (images_per_batch % num_gpus == 0
                ), "TEST.IMS_PER_BATCH ({}) must be divisible by the number "
        "of GPUs ({}) used.".format(images_per_batch, num_gpus)

        images_per_gpu = images_per_batch // num_gpus
        shuffle = False if not is_distributed else True
        num_iters = None
        start_iter = 0

    if images_per_gpu > 1:
        logger = logging.getLogger(__name__)
        logger.warning(
            "When using more than one image per GPU you may encounter "
            "an out-of-memory (OOM) error if your GPU does not have "
            "sufficient memory. If this happens, you can reduce "
            "SOLVER.IMS_PER_BATCH (for training) or "
            "TEST.IMS_PER_BATCH (for inference). For training, you must "
            "also adjust the learning rate and schedule length according "
            "to the linear scaling rule. See for example: "
            "https://github.com/facebookresearch/Detectron/blob/master/configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml#L14"
        )

    # group images which have similar aspect ratio. In this case, we only
    # group in two cases: those with width / height > 1, and the other way around,
    # but the code supports more general grouping strategy
    # 默认为True
    aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else []

    paths_catalog = import_file("maskrcnn_benchmark.config.paths_catalog",
                                cfg.PATHS_CATALOG, True)

    # maskrcnn_benchmark.config.paths_catalog.py
    DatasetCatalog = paths_catalog.DatasetCatalog  # 将DatasetCatalog对象赋值给该变量

    # ("coco_2014_train", "coco_2014_valminusminival") for train
    # ("coco_2014_minival",) for test
    dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST

    # transform中传入的target指的是image对应的BoxList对象
    transforms = build_transforms(cfg, is_train)
    # 创建COCODataset对象
    datasets = build_dataset(dataset_list, transforms, DatasetCatalog,
                             is_train)

    data_loaders = []
    for dataset in datasets:
        # 这里创建RandomSampler对象
        sampler = make_data_sampler(dataset, shuffle, is_distributed)
        # 创建BatchSampler对象
        batch_sampler = make_batch_data_sampler(dataset, sampler,
                                                aspect_grouping,
                                                images_per_gpu, num_iters,
                                                start_iter)
        # todo: 目前不清楚这个干啥的, 看看再说
        collator = BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY)  # 32
        # Number of data loading threads, 4
        num_workers = cfg.DATALOADER.NUM_WORKERS
        data_loader = torch.utils.data.DataLoader(
            dataset,
            num_workers=num_workers,
            batch_sampler=batch_sampler,
            collate_fn=collator,
        )
        data_loaders.append(data_loader)
    if is_train:
        # during training, a single (possibly concatenated) data_loader is returned
        assert len(data_loaders) == 1
        return data_loaders[0]
    return data_loaders
Exemple #15
0
def mlperf_test_early_exit(iteration, iters_per_epoch, tester, model,
                           distributed, min_bbox_map, min_segm_map):
    # Note: let iters / epoch == 10k, at iter 9999 we've finished epoch 0 and need to test
    if iteration > 0 and (iteration + 1) % iters_per_epoch == 0:
        synchronize()
        epoch = iteration // iters_per_epoch + 1

        mlperf_print(key=constants.EPOCH_STOP, metadata={"epoch_num": epoch})
        mlperf_print(key=constants.BLOCK_STOP,
                     metadata={"first_epoch_num": epoch})
        mlperf_print(key=constants.EVAL_START, metadata={"epoch_num": epoch})
        # set the async evaluator's tag correctly
        set_epoch_tag(epoch)

        # Note: No longer returns anything, underlying future is in another castle
        tester(model=model, distributed=distributed)
        # necessary for correctness
        model.train()
    else:
        # Otherwise, check for finished async results
        results = check_completed_tags()

        # on master process, check each result for terminating condition
        # sentinel for run finishing
        finished = 0
        if is_main_process():
            for result_epoch, (bbox_map, segm_map) in results.items():
                # mlperf_print(key=constants.EVAL_TARGET, value={"BBOX": min_bbox_map,
                #                                                 "SEGM": min_segm_map})
                logger = logging.getLogger('maskrcnn_benchmark.trainer')
                logger.info('bbox mAP: {}, segm mAP: {}'.format(
                    bbox_map, segm_map))

                mlperf_print(
                    key=constants.EVAL_ACCURACY,
                    value={"accuracy": {
                        "BBOX": bbox_map,
                        "SEGM": segm_map
                    }},
                    metadata={"epoch_num": result_epoch})
                mlperf_print(key=constants.EVAL_STOP,
                             metadata={"epoch_num": result_epoch})
                # terminating condition
                if bbox_map >= min_bbox_map and segm_map >= min_segm_map:
                    logger.info("Target mAP reached, exiting...")
                    finished = 1
                    #return True

        # We now know on rank 0 whether or not we should terminate
        # Bcast this flag on multi-GPU
        if get_world_size() > 1:
            with torch.no_grad():
                finish_tensor = torch.tensor([finished],
                                             dtype=torch.int32,
                                             device=torch.device('cuda'))
                torch.distributed.broadcast(finish_tensor, 0)

                # If notified, end.
                if finish_tensor.item() == 1:
                    return True
        else:
            # Single GPU, don't need to create tensor to bcast, just use value directly
            if finished == 1:
                return True

    # Otherwise, default case, continue
    return False
Exemple #16
0
def make_mt_data_loader(cfg,
                        is_train=True,
                        is_distributed=False,
                        start_iter=0,
                        mode='source',
                        img_ratio=1.):
    num_gpus = get_world_size()
    dataset_list_dict = {
        'source': 'papnuclei_source',
        'no_label': 'papnuclei_no_label',
    }

    if is_train:
        images_per_batch = int(cfg.SOLVER.IMS_PER_BATCH * img_ratio)
        assert (images_per_batch % num_gpus == 0
                ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number "
        "of GPUs ({}) used.".format(images_per_batch, num_gpus)
        images_per_gpu = images_per_batch // num_gpus
        shuffle = True
        num_iters = cfg.SOLVER.MAX_ITER
    else:
        images_per_batch = cfg.TEST.IMS_PER_BATCH
        assert (images_per_batch % num_gpus == 0
                ), "TEST.IMS_PER_BATCH ({}) must be divisible by the number "
        "of GPUs ({}) used.".format(images_per_batch, num_gpus)
        images_per_gpu = images_per_batch // num_gpus
        shuffle = False if not is_distributed else True
        num_iters = None
        start_iter = 0

    if images_per_gpu > 1:
        logger = logging.getLogger(__name__)
        logger.warning(
            "When using more than one image per GPU you may encounter "
            "an out-of-memory (OOM) error if your GPU does not have "
            "sufficient memory. If this happens, you can reduce "
            "SOLVER.IMS_PER_BATCH (for training) or "
            "TEST.IMS_PER_BATCH (for inference). For training, you must "
            "also adjust the learning rate and schedule length according "
            "to the linear scaling rule. See for example: "
            "https://github.com/facebookresearch/Detectron/blob/master/configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml#L14"
        )

    # group images which have similar aspect ratio. In this case, we only
    # group in two cases: those with width / height > 1, and the other way around,
    # but the code supports more general grouping strategy
    aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else []

    paths_catalog = import_file("maskrcnn_benchmark.config.paths_catalog",
                                cfg.PATHS_CATALOG, True)
    DatasetCatalog = paths_catalog.DatasetCatalog
    if is_train:
        dataset = dataset_list_dict[mode]
    elif cfg.DATASETS.MODE_IN_TEST == 'val':
        dataset = cfg.DATASETS.VAL
    else:
        dataset = cfg.DATASETS.TEST
    transforms = build_transforms(cfg, is_train, domain=mode)
    syn_mt = True if cfg.SYN.MT_LOSS > 0 else False
    dataset = build_dataset([dataset],
                            transforms,
                            DatasetCatalog,
                            is_train,
                            aug_k=cfg.MT.AUG_K + cfg.MT.AUG_S,
                            syn_mt=syn_mt,
                            gen_true=cfg.DATASETS.GEN_TRUE)
    collators = build_collator(cfg, mode)
    dataloader = build_mt_data_loader(dataset[0], shuffle, is_distributed,
                                      aspect_grouping, images_per_gpu,
                                      num_iters, start_iter, cfg, is_train,
                                      collators)
    return dataloader
Exemple #17
0
def make_data_loader(cfg,
                     is_train=True,
                     is_distributed=False,
                     start_iter=0,
                     shuffle=None):  # add by hui
    num_gpus = get_world_size()
    if is_train:
        images_per_batch = cfg.SOLVER.IMS_PER_BATCH
        assert (images_per_batch % num_gpus == 0
                ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number "
        "of GPUs ({}) used.".format(images_per_batch, num_gpus)
        images_per_gpu = images_per_batch // num_gpus
        if shuffle is None: shuffle = True
        num_iters = cfg.SOLVER.MAX_ITER

        # ############################## add by hui ########################################
        balance_normal = cfg.DATALOADER.USE_TRAIN_BALANCE_NORMAL
        normal_ratio = cfg.DATALOADER.TRAIN_NORMAL_RATIO
        remove_images_without_annotations = not balance_normal
        filter_ignore = cfg.DATASETS.COCO_DATASET.TRAIN_FILTER_IGNORE
        ################################################################################
    else:
        images_per_batch = cfg.TEST.IMS_PER_BATCH
        assert (images_per_batch % num_gpus == 0
                ), "TEST.IMS_PER_BATCH ({}) must be divisible by the number "
        "of GPUs ({}) used.".format(images_per_batch, num_gpus)
        images_per_gpu = images_per_batch // num_gpus
        if shuffle is None: shuffle = False if not is_distributed else True
        num_iters = None
        start_iter = 0
        # ############################## add by hui ########################################
        balance_normal = cfg.DATALOADER.USE_TEST_BALANCE_NORMAL
        normal_ratio = cfg.DATALOADER.TEST_NORMAL_RATIO
        if balance_normal: shuffle = True
        remove_images_without_annotations = False
        filter_ignore = cfg.DATASETS.COCO_DATASET.TEST_FILTER_IGNORE
        ################################################################################
    if cfg.DATALOADER.DEBUG.CLOSE_SHUFFLE:  # add by hui
        shuffle = False

    if images_per_gpu > 1:
        logger = logging.getLogger(__name__)
        logger.warning(
            "When using more than one image per GPU you may encounter "
            "an out-of-memory (OOM) error if your GPU does not have "
            "sufficient memory. If this happens, you can reduce "
            "SOLVER.IMS_PER_BATCH (for training) or "
            "TEST.IMS_PER_BATCH (for inference). For training, you must "
            "also adjust the learning rate and schedule length according "
            "to the linear scaling rule. See for example: "
            "https://github.com/facebookresearch/Detectron/blob/master/configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml#L14"
        )

    # group images which have similar aspect ratio. In this case, we only
    # group in two cases: those with width / height > 1, and the other way around,
    # but the code supports more general grouping strategy
    aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else []

    paths_catalog = import_file("maskrcnn_benchmark.config.paths_catalog",
                                cfg.PATHS_CATALOG, True)
    DatasetCatalog = paths_catalog.DatasetCatalog
    dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST

    transforms = build_transforms(cfg, is_train)
    datasets = build_dataset(dataset_list, transforms, DatasetCatalog,
                             is_train, remove_images_without_annotations,
                             filter_ignore)  # add by hui

    data_loaders = []
    for dataset in datasets:
        sampler = make_data_sampler(dataset, shuffle, is_distributed,
                                    balance_normal,
                                    normal_ratio)  # changed by hui
        batch_sampler = make_batch_data_sampler(dataset, sampler,
                                                aspect_grouping,
                                                images_per_gpu, num_iters,
                                                start_iter)
        collator = BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY)
        num_workers = cfg.DATALOADER.NUM_WORKERS
        data_loader = torch.utils.data.DataLoader(
            dataset,
            num_workers=num_workers,
            batch_sampler=batch_sampler,
            collate_fn=collator,
            timeout=0,  # add by hui for big batch
        )
        data_loaders.append(data_loader)
    if is_train:
        # during training, a single (possibly concatenated) data_loader is returned
        assert len(data_loaders) == 1
        return data_loaders[0]
    return data_loaders
Exemple #18
0
def do_train(
    model,
    model_ema,
    data_loader,
    optimizer,
    scheduler,
    checkpointer,
    device,
    local_rank,
    checkpoint_period,
    cfg_arg,
    arguments,
):
    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")
    meters = MetricLogger(delimiter="  ")
    meters_ema = MetricLogger(delimiter="  ")

    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    ema_decay = arguments["ema_decay"]
    loss_semi = arguments['loss_semi']
    temporal_save_path = cfg_arg["temporal_save_path"]
    model.train()
    model_ema.train()
    box_coder = BoxCoder(weights=(10., 10., 5., 5.))
    temporal_ens = {}
    start_training_time = time.time()
    end = time.time()
    labeled_database = arguments["HYPER_PARAMETERS"]['LABELED_DATABASE']
    temporal_supervised_losses = []

    for iteration, (images, targets_with_trans_info,
                    idx) in enumerate(data_loader, start_iter):
        targets = [_iter[0] for _iter in targets_with_trans_info]
        trans_info = [_iter[1] for _iter in targets_with_trans_info]

        try:
            db_idx, img_idx, idx_name, bboxes_batch = map_to_img(
                data_loader, idx)
            temporal_ens_bboxes = [
                ensemble_bboxes(_boxes, _im_sz, arguments["ANCHOR_STRIDES"],
                                arguments["HYPER_PARAMETERS"]['ENS_THRE'],
                                device)
                for _boxes, _im_sz in zip(bboxes_batch, images.image_sizes)
            ]

            img_size = [(_sz[1], _sz[0]) for _sz in images.image_sizes]
            pred_trans_info = copy.deepcopy(trans_info)
            temporal_ens_pred = []

            for i, _sz in enumerate(img_size):
                pred_trans_info[i][1] = _sz
                temporal_ens_per = [
                    trans_reverse(_temporal_ens, pred_trans_info[i]).to(device)
                    for _temporal_ens in temporal_ens_bboxes[i]
                ]
                temporal_ens_pred.append(temporal_ens_per)

            db_w = []
            for i, _db in enumerate(db_idx):
                if _db not in labeled_database:
                    _bbox = BoxList(
                        torch.zeros([1, 4]),
                        (images.image_sizes[i][1], images.image_sizes[i][0]),
                        mode="xyxy")
                    _bbox.add_field('labels', torch.ones([1]))
                    targets[i] = _bbox
                    db_w.append(0.)
                else:
                    db_w.append(1.)

            if any(len(target) < 1 for target in targets):
                logger.error(
                    f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}"
                )
                continue
            data_time = time.time() - end
            iteration = iteration + 1
            arguments["iteration"] = iteration

            images = images.to(device)
            targets = [target.to(device) for target in targets]
            update_ema_variables(model, model_ema, ema_decay, iteration)

            _loss_dict, result = model(images, targets)
            #---------------------loss masked by
            with torch.no_grad():
                _loss_dict_ema, result_ema = model_ema(images, targets)
                is_labeled_db_weight = torch.tensor(
                    db_w, dtype=torch.float32).to(device)

            loss_dict = {}
            loss_dict_ema = {}
            for _key in _loss_dict.keys():
                loss_dict[_key] = torch.sum(
                    torch.stack(_loss_dict[_key], dim=0) *
                    is_labeled_db_weight)
                loss_dict_ema[_key] = torch.sum(
                    torch.stack(_loss_dict_ema[_key], dim=0) *
                    is_labeled_db_weight)

            # loss_dict = _loss_dict
            # loss_dict_ema = _loss_dict_ema

            #result_origin = [trans_reverse(_res,_info) for _res,_info in zip(result_ema,trans_info)]
            #result_origin = predict_collect_postprocess(arguments['postprocess'],result_ema,trans_info)
            result_origin = predict_retina_postprocess(
                arguments['postprocess'], box_coder, result_ema, trans_info,
                images.image_sizes)

            # any_zeros = [_iter.bbox.shape[0] == 0 for _iter in temporal_ens_pred]
            # if any(any_zeros):
            #     loss_dict['semi_box_reg'] = torch.tensor(0,dtype=torch.float32,device=device)
            #     loss_dict['semi_cls'] = torch.tensor(0,dtype=torch.float32,device=device)
            # else:
            #     semi_loss = loss_semi(
            #         result, temporal_ens_pred)
            #     for _key in semi_loss.keys():
            #         loss_dict[_key] = torch.sum(torch.stack(semi_loss[_key],dim=0) * (1 - db_weight)) * arguments["semi_weight"]

            #balance losses
            with torch.no_grad():
                supversed_loss = (loss_dict['loss_retina_cls'] +
                                  loss_dict['loss_retina_reg']) / (
                                      np.sum(db_w) + 0.1)
            temporal_supervised_losses.append(supversed_loss)
            temporal_supervised_losses = temporal_supervised_losses[-100:]
            sup_loss = torch.stack(temporal_supervised_losses).mean()
            meters.update(sup_loss=sup_loss)

            if get_world_size() > 1:
                torch.distributed.all_reduce(
                    torch.stack(temporal_supervised_losses).mean(),
                    op=torch.distributed.ReduceOp.SUM)
            balance_weight = min(1. / (sup_loss / 0.28)**12, 1.)

            semi_loss = semi_loss_fn(
                result,
                result_ema,
                temporal_ens_pred,
                images.image_sizes,
                box_coder,
                n_cls=arguments["HYPER_PARAMETERS"]['NCLS'],
                reg_cons_w=arguments["HYPER_PARAMETERS"]['REG_CONSIST_WEIGHT'])
            semi_loss_weight = semi_weight_by_epoch(
                iteration,
                start_iter=arguments["HYPER_PARAMETERS"]['EPOCH_BATCH_NUM'] *
                arguments["HYPER_PARAMETERS"]['START_ITER'],
                rampup_length=arguments["HYPER_PARAMETERS"]['EPOCH_BATCH_NUM']
                * arguments["HYPER_PARAMETERS"]['RAMPUP_LENGTH'],
                consistence_weight=arguments["HYPER_PARAMETERS"]
                ['CONSISTENCE_WEIGHT'],
                consistence_trunc=arguments["HYPER_PARAMETERS"]
                ['MAX_CONSISTENT_LOSS'])  #semi_weight_by_epoch(iteration)
            for _key in semi_loss.keys():
                #loss_dict[_key] = torch.sum(semi_loss[_key] * (1 - is_labeled_db_weight))*semi_loss_weight*balance_weight # not used labeled
                loss_dict[_key] = torch.sum(semi_loss[_key]) * semi_loss_weight

            for i, (_id, _labeled) in enumerate(zip(idx_name, db_w)):
                # if _labeled == 1:
                #     continue
                result_dict = {
                    'iteration': iteration,
                    'result': result_origin[i]
                }
                if _id in temporal_ens.keys():
                    temporal_ens[_id].append(result_dict)
                else:
                    temporal_ens[_id] = [result_dict]

            #print('id={},{},scores={}----------{}'.format(idx_name[0],idx_name[1],result_origin[0].get_field('objectness')[:5],result_origin[1].get_field('objectness')[:5]))
            losses = sum(loss for loss in loss_dict.values())

            # reduce losses over all GPUs for logging purposes
            loss_dict_reduced = reduce_loss_dict(loss_dict)

            losses_reduced = sum(loss for loss in loss_dict_reduced.values())
            meters.update(loss=losses_reduced, **loss_dict_reduced)

            loss_dict_reduced_ema = reduce_loss_dict(loss_dict_ema)
            losses_reduced_ema = sum(
                loss for loss in loss_dict_reduced_ema.values())
            meters_ema.update(loss=losses_reduced_ema, **loss_dict_reduced_ema)

            optimizer.zero_grad()
            # Note: If mixed precision is not used, this ends up doing nothing
            # Otherwise apply loss scaling for mixed-precision recipe
            with amp.scale_loss(losses, optimizer) as scaled_losses:
                scaled_losses.backward()

            if not iteration < arguments["HYPER_PARAMETERS"][
                    'EPOCH_BATCH_NUM'] * arguments["HYPER_PARAMETERS"][
                        'START_ITER']:
                optimizer.step()
            #scheduler.step()

            batch_time = time.time() - end
            end = time.time()
            meters.update(time=batch_time, data=data_time)

            eta_seconds = meters.time.global_avg * (max_iter - iteration)
            eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

            if iteration % 20 == 0 or iteration == max_iter:
                logger.info(
                    meters.delimiter.join([
                        "eta: {eta}",
                        "iter: {iter}",
                        "{meters}",
                        "{meters_ema}",
                        "lr: {lr:.6f}",
                        "semi_w:{semi_w:2.3f}",
                        "supervised loss{sup_loss:2.3f},"
                        "balance_weight{balance_weight:2.3f},"
                        "max mem: {memory:.0f}",
                    ]).format(
                        eta=eta_string,
                        iter=iteration,
                        meters=str(meters),
                        meters_ema=str(meters_ema),
                        lr=optimizer.param_groups[0]["lr"],
                        semi_w=semi_loss_weight,
                        sup_loss=sup_loss,
                        balance_weight=balance_weight,
                        memory=torch.cuda.max_memory_allocated() / 1024.0 /
                        1024.0,
                    ))

            if (iteration - 50) % 100 == 0:
                for _key in temporal_ens.keys():
                    for _iter in temporal_ens[_key]:
                        str_folder = os.path.join(
                            temporal_save_path,
                            _key)  #"{}/{}".format(temporal_save_path,_key)
                        str_file = '{}/{}_loc{}_iter_x{:07d}.pt'.format(
                            str_folder, _key, local_rank, _iter['iteration'])
                        if not os.path.exists(str_folder):
                            os.makedirs(str_folder)
                        torch.save(_iter['result'], str_file)
                        del _iter['result']

                del temporal_ens
                temporal_ens = {}

            if iteration % checkpoint_period == 0:
                save_time = time.time()
                checkpointer.save("model_{:07d}".format(iteration),
                                  **arguments)

            if iteration == max_iter:
                checkpointer.save("model_final", **arguments)

        except Exception as e:
            print('error in file ', idx_name, img_idx)
            raise e

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / (max_iter)))
Exemple #19
0
def do_train(
    model,
    data_loader,
    optimizer,
    scheduler,
    checkpointer,
    device,
    checkpoint_period,
    arguments,
    log_step=20,
    data_partition=1,
    explicit_average_grad=False,
    no_update=False,
):
    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")
    meters = MetricLogger(delimiter="  ")
    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    start_training_time = time.time()
    end = time.time()
    log_start = time.time()
    from qd.qd_common import is_hvd_initialized
    use_hvd = is_hvd_initialized()
    visualize_input = False
    fix_input = False

    for iteration, (images, targets, _) in enumerate(data_loader, start_iter):
        if hasattr(images, 'image_sizes') and len(images.image_sizes) == 0:
            logging.error('this should never happen since different workers '
                          'will have different numbers of iterations.')
            continue

        if fix_input:
            logging.info('fix input')
            from qd.qd_common import run_if_not_memory_cached

            def get_x(x):
                return x

            images = run_if_not_memory_cached(get_x, images, __key='images')
            targets = run_if_not_memory_cached(get_x, targets, __key='targets')

        if visualize_input:
            from qd.qd_pytorch import visualize_maskrcnn_input
            visualize_maskrcnn_input(images, targets, show_box=True)

        data_time = time.time() - end
        iteration = iteration + 1
        arguments["iteration"] = iteration

        if not no_update:
            scheduler.step()

        if isinstance(images, list):
            images = [x.to(device) for x in images]
        else:
            images = images.to(device)
        if isinstance(targets, torch.Tensor):
            targets = targets.to(device)
        else:
            targets = [target.to(device) for target in targets]

        if not no_update:
            optimizer.zero_grad()

        all_image_target = partition_data(images, targets, data_partition)

        for curr_images, curr_target in all_image_target:
            forward_backward(model,
                             curr_images,
                             curr_target,
                             optimizer,
                             arguments,
                             checkpointer,
                             use_hvd,
                             meters,
                             device,
                             loss_scalar=1. / data_partition,
                             no_update=no_update)
        if explicit_average_grad:
            average_gradients(model)

        if not no_update:
            optimizer.step()

        batch_time = time.time() - end
        end = time.time()

        if iteration > start_iter + 5:
            # we will skip the first few iterations since the time cost
            # evaluation for those are not good
            meters.update(time=batch_time, data=data_time)

        if iteration % log_step == 0 or iteration == max_iter:
            speed = get_world_size() * log_step * len(targets) / (time.time() -
                                                                  log_start)
            if hasattr(meters, 'time'):
                eta_seconds = meters.time.global_avg * (max_iter - iteration)
                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
            else:
                eta_string = 'Unknown'

            logger.info(
                meters.delimiter.join([
                    "eta: {eta}",
                    "iter: {iter}",
                    'speed: {speed:.1f} images/sec',
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    speed=speed,
                    meters=str(meters),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
            log_start = time.time()
        if iteration % checkpoint_period == 0:
            # with blobfuse, saving could fail with unknown reason. Instead of
            # saving and crashing, we do a best-effort manner.
            try_save_intermediate_snapshot(checkpointer, iteration, arguments)

    checkpointer.save("model_final", **arguments)
    if get_rank() > 0:
        old_value = checkpointer.save_to_disk
        checkpointer.save_to_disk = True
        checkpointer.save("model_final_{}".format(get_rank()), **arguments)
        checkpointer.save_to_disk = old_value

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str,
        total_training_time / (1 if max_iter == 0 else max_iter)))
Exemple #20
0
def inference(
        model,
        data_loader,
        dataset_name,
        iou_types=("bbox", ),
        box_only=False,
        bbox_aug=False,
        device="cuda",
        expected_results=(),
        expected_results_sigma_tol=4,
        output_folder=None,
):
    # convert to a torch.device for efficiency
    device = torch.device(device)
    num_devices = get_world_size()
    logger = logging.getLogger("maskrcnn_benchmark.inference")
    dataset = data_loader.dataset
    logger.info("Start evaluation on {} dataset({} images).".format(
        dataset_name, len(dataset)))
    total_timer = Timer()
    inference_timer = Timer()
    total_timer.tic()
    predictions = compute_on_dataset(model, data_loader, device, bbox_aug,
                                     inference_timer)
    # wait for all processes to complete before measuring the time
    synchronize()
    print('>>>>>>==============results_dict_cpu.keys()=',
          len(predictions.keys()), predictions.keys())

    total_time = total_timer.toc()
    total_time_str = get_time_str(total_time)
    logger.info(
        "Total run time: {} ({} s / img per device, on {} devices)".format(
            total_time_str, total_time * num_devices / len(dataset),
            num_devices))
    total_infer_time = get_time_str(inference_timer.total_time)
    logger.info(
        "Model inference time: {} ({} s / img per device, on {} devices)".
        format(
            total_infer_time,
            inference_timer.total_time * num_devices / len(dataset),
            num_devices,
        ))

    predictions = _accumulate_predictions_from_multiple_gpus(predictions)
    print('>>>>>><<<<<<<<<<<==============results_dict_cpu.keys()=',
          len(predictions))
    print(predictions[0])

    if not is_main_process():
        return

    if output_folder:
        torch.save(predictions, os.path.join(output_folder, "predictions.pth"))

    extra_args = dict(
        box_only=box_only,
        iou_types=iou_types,
        expected_results=expected_results,
        expected_results_sigma_tol=expected_results_sigma_tol,
    )

    return evaluate(dataset=dataset,
                    predictions=predictions,
                    output_folder=output_folder,
                    **extra_args)
Exemple #21
0
def make_data_loader(cfg, annotations, classes, is_train=True, is_distributed=False, start_iter=0):
    num_gpus = get_world_size()
    if is_train:
        images_per_batch = cfg.SOLVER.IMS_PER_BATCH
        assert (
            images_per_batch % num_gpus == 0
        ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number "
        "of GPUs ({}) used.".format(images_per_batch, num_gpus)
        images_per_gpu = images_per_batch // num_gpus
        shuffle = True
        num_iters = cfg.SOLVER.MAX_ITER
    else:
        images_per_batch = cfg.TEST.IMS_PER_BATCH
        assert (
            images_per_batch % num_gpus == 0
        ), "TEST.IMS_PER_BATCH ({}) must be divisible by the number "
        "of GPUs ({}) used.".format(images_per_batch, num_gpus)
        images_per_gpu = images_per_batch // num_gpus
        shuffle = False if not is_distributed else True
        num_iters = None
        start_iter = 0

    if images_per_gpu > 1:
        logger = logging.getLogger(__name__)
        logger.warning(
            "When using more than one image per GPU you may encounter "
            "an out-of-memory (OOM) error if your GPU does not have "
            "sufficient memory. If this happens, you can reduce "
            "SOLVER.IMS_PER_BATCH (for training) or "
            "TEST.IMS_PER_BATCH (for inference). For training, you must "
            "also adjust the learning rate and schedule length according "
            "to the linear scaling rule. See for example: "
            "https://github.com/facebookresearch/Detectron/blob/master/configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml#L14"
        )

    # group images which have similar aspect ratio. In this case, we only
    # group in two cases: those with width / height > 1, and the other way around,
    # but the code supports more general grouping strategy
    aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else []

    transforms = build_transforms(cfg, is_train)
    datasets = [CustomDataset(annotations, transforms=transforms, classes=classes)]

    data_loaders = []
    for dataset in datasets:
        sampler = make_data_sampler(dataset, shuffle, is_distributed)
        batch_sampler = make_batch_data_sampler(
            dataset, sampler, aspect_grouping, images_per_gpu, num_iters, start_iter
        )
        collator = BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY)
        num_workers = cfg.DATALOADER.NUM_WORKERS
        data_loader = torch.utils.data.DataLoader(
            dataset,
            num_workers=num_workers,
            batch_sampler=batch_sampler,
            collate_fn=collator,
        )
        data_loaders.append(data_loader)
    if is_train:
        # during training, a single (possibly concatenated) data_loader is returned
        assert len(data_loaders) == 1
        return data_loaders[0]
    return data_loaders
Exemple #22
0
def make_data_loader(cfg, is_train=True, is_distributed=False, start_iter=0):
    num_gpus = get_world_size()
    if is_train:
        images_per_batch = cfg.SOLVER.IMS_PER_BATCH
        assert (
            images_per_batch % num_gpus == 0
        ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number "
        "of GPUs ({}) used.".format(images_per_batch, num_gpus)
        images_per_gpu = images_per_batch // num_gpus
        shuffle = True
        num_iters = cfg.SOLVER.MAX_ITER
    else:
        images_per_batch = cfg.TEST.IMS_PER_BATCH
        assert (
            images_per_batch % num_gpus == 0
        ), "TEST.IMS_PER_BATCH ({}) must be divisible by the number "
        "of GPUs ({}) used.".format(images_per_batch, num_gpus)
        images_per_gpu = images_per_batch // num_gpus
        shuffle = False if not is_distributed else True
        num_iters = None
        start_iter = 0

    if images_per_gpu > 1:
        logger = logging.getLogger(__name__)
        logger.warning(
            "When using more than one image per GPU you may encounter "
            "an out-of-memory (OOM) error if your GPU does not have "
            "sufficient memory. If this happens, you can reduce "
            "SOLVER.IMS_PER_BATCH (for training) or "
            "TEST.IMS_PER_BATCH (for inference). For training, you must "
            "also adjust the learning rate and schedule length according "
            "to the linear scaling rule. See for example: "
            "https://github.com/facebookresearch/Detectron/blob/master/configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml#L14"
        )

    # group images which have similar aspect ratio. In this case, we only
    # group in two cases: those with width / height > 1, and the other way around,
    # but the code supports more general grouping strategy
    aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else []

    paths_catalog = import_file(
        "maskrcnn_benchmark.config.paths_catalog", cfg.PATHS_CATALOG, True
    )
    DatasetCatalog = paths_catalog.DatasetCatalog
    dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST

    transforms = build_transforms(cfg, is_train)
    datasets = build_dataset(dataset_list, transforms, DatasetCatalog, is_train)

    data_loaders = []
    for dataset in datasets:
        sampler = make_data_sampler(dataset, shuffle, is_distributed)
        batch_sampler = make_batch_data_sampler(
            dataset, sampler, aspect_grouping, images_per_gpu, num_iters, start_iter
        )
        collator = BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY)
        num_workers = cfg.DATALOADER.NUM_WORKERS
        data_loader = torch.utils.data.DataLoader(
            dataset,
            num_workers=num_workers,
            batch_sampler=batch_sampler,
            collate_fn=collator,
        )
        data_loaders.append(data_loader)
    if is_train:
        # during training, a single (possibly concatenated) data_loader is returned
        assert len(data_loaders) == 1
        return data_loaders[0]
    return data_loaders
def do_train(
    cfg,
    model,
    data_loader,
    data_loader_val,
    optimizer,
    scheduler,
    checkpointer,
    device,
    checkpoint_period,
    test_period,
    arguments,
    writer,
):
    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")
    meters = MetricLogger(delimiter="  ")
    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    model.train()
    start_training_time = time.time()
    end = time.time()

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    dataset_names = cfg.DATASETS.TEST

    for iteration, (images, targets, _) in enumerate(data_loader, start_iter):

        if any(len(target) < 1 for target in targets):
            logger.error(
                f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}"
            )
            continue
        data_time = time.time() - end
        iteration = iteration + 1
        arguments["iteration"] = iteration

        images = images.to(device)
        targets = [target.to(device) for target in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        meters.update(loss=losses_reduced, **loss_dict_reduced)

        optimizer.zero_grad()

        # # Add images every 100 iterations
        if iteration % 100 == 0:
            #     # Display images
            #     image = images.tensors[0].cpu().numpy()
            #     means = np.zeros((image.shape[0], image.shape[1], image.shape[2]))
            #     means[0] = 102.9801
            #     means[1] = 115.9465
            #     means[2] = 122.7717
            #     image = image + means
            #     image = image[[2, 1, 0]].astype(np.uint8)

            #     writer.add_image('input image', image, iteration)

            #     for b in range(len(targets[0].bbox)):
            #         box = targets[0].bbox[b]
            #         x1 = np.around(box[0].cpu().numpy())
            #         y1 = np.around(box[1].cpu().numpy())
            #         x2 = np.around(box[2].cpu().numpy())
            #         y2 = np.around(box[3].cpu().numpy())
            #         rr, cc = rectangle_perimeter(y1, x1, y2-y1, x2-x1)
            #         image[:, rr, cc] = 255

            #     writer.add_image('target boxes', image, iteration)

            #     # Display masks
            #     masks = targets[0].get_field('masks')[0]
            #     masks = masks.get_mask_tensor()
            #     combined_mask = masks[0, :, :]
            #     for i in range(1,8):
            #         combined_mask = combined_mask | masks[i, :, :]
            #     writer.add_image('mask', combined_mask.unsqueeze(0)*255, iteration)
            # writer.add_image('single part 2', masks[1, :, :].unsqueeze(0)*255, iteration)
            # writer.add_image('single part 3', masks[2, :, :].unsqueeze(0)*255, iteration)
            # writer.add_image('single part 4', masks[3, :, :].unsqueeze(0)*255, iteration)
            # writer.add_image('single part 5', masks[4, :, :].unsqueeze(0)*255, iteration)
            # writer.add_image('single part 6', masks[5, :, :].unsqueeze(0)*255, iteration)
            # writer.add_image('single part 7', masks[6, :, :].unsqueeze(0)*255, iteration)
            # writer.add_image('single part 8', masks[7, :, :].unsqueeze(0)*255, iteration)

            # Display Losses
            writer.add_scalar('loss', meters.loss.median, iteration)
            writer.add_scalar('loss_classifier',
                              loss_dict_reduced['loss_classifier'].item(),
                              iteration)
            writer.add_scalar('loss_box_reg',
                              loss_dict_reduced['loss_box_reg'].item(),
                              iteration)
            writer.add_scalar('loss_objectness',
                              loss_dict_reduced['loss_objectness'].item(),
                              iteration)
            writer.add_scalar('loss_rpn_box_reg',
                              loss_dict_reduced['loss_rpn_box_reg'].item(),
                              iteration)
            writer.add_scalar('loss_mask',
                              loss_dict_reduced['loss_mask'].item(), iteration)
            writer.add_scalar('loss_kpt', loss_dict_reduced['loss_kp'].item(),
                              iteration)
            writer.add_scalar('lr', optimizer.param_groups[0]['lr'], iteration)

        # Note: If mixed precision is not used, this ends up doing nothing
        # Otherwise apply loss scaling for mixed-precision recipe
        with amp.scale_loss(losses, optimizer) as scaled_losses:
            scaled_losses.backward()
        optimizer.step()
        scheduler.step()

        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time, data=data_time)

        eta_seconds = meters.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        if iteration % 20 == 0 or iteration == max_iter:
            logger.info(
                meters.delimiter.join([
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)
        if data_loader_val is not None and test_period > 0 and iteration % test_period == 0:
            meters_val = MetricLogger(delimiter="  ")
            synchronize()
            _ = inference(  # The result can be used for additional logging, e. g. for TensorBoard
                model,
                # The method changes the segmentation mask format in a data loader,
                # so every time a new data loader is created:
                make_data_loader(cfg,
                                 is_train=False,
                                 is_distributed=(get_world_size() > 1),
                                 is_for_period=True),
                dataset_name="[Validation]",
                iou_types=iou_types,
                box_only=False
                if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
                device=cfg.MODEL.DEVICE,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=None,
            )
            synchronize()
            model.train()
            with torch.no_grad():
                # Should be one image for each GPU:
                for iteration_val, (images_val, targets_val,
                                    _) in enumerate(tqdm(data_loader_val)):
                    images_val = images_val.to(device)
                    targets_val = [target.to(device) for target in targets_val]
                    loss_dict = model(images_val, targets_val)
                    losses = sum(loss for loss in loss_dict.values())
                    loss_dict_reduced = reduce_loss_dict(loss_dict)
                    losses_reduced = sum(
                        loss for loss in loss_dict_reduced.values())
                    meters_val.update(loss=losses_reduced, **loss_dict_reduced)
            synchronize()
            logger.info(
                meters_val.delimiter.join([
                    "[Validation]: ",
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters_val),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
        if iteration == max_iter:
            checkpointer.save("model_final", **arguments)

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / (max_iter)))
Exemple #24
0
def make_data_loader_AL(cfg,
                        is_train=True,
                        is_distributed=False,
                        start_iter=0,
                        indices=None,
                        is_passive=True):
    num_gpus = get_world_size()
    if is_train:
        images_per_batch = cfg.SOLVER.IMS_PER_BATCH
        assert (images_per_batch % num_gpus == 0
                ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number "
        "of GPUs ({}) used.".format(images_per_batch, num_gpus)
        images_per_gpu = images_per_batch // num_gpus
        shuffle = True
        num_iters = cfg.SOLVER.MAX_ITER
    else:
        images_per_batch = cfg.TEST.IMS_PER_BATCH
        assert (images_per_batch % num_gpus == 0
                ), "TEST.IMS_PER_BATCH ({}) must be divisible by the number "
        "of GPUs ({}) used.".format(images_per_batch, num_gpus)
        images_per_gpu = images_per_batch // num_gpus
        shuffle = False if not is_distributed else True
        num_iters = None
        start_iter = 0

    if images_per_gpu > 1:
        logger = logging.getLogger(__name__)
        # logger.warning(
        #     "When using more than one image per GPU you may encounter "
        #     "an out-of-memory (OOM) error if your GPU does not have "
        #     "sufficient memory. If this happens, you can reduce "
        #     "SOLVER.IMS_PER_BATCH (for training) or "
        #     "TEST.IMS_PER_BATCH (for inference). For training, you must "
        #     "also adjust the learning rate and schedule length according "
        #     "to the linear scaling rule. See for example: "
        #     "https://github.com/facebookresearch/Detectron/blob/master/configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml#L14"
        # )

    # group images which have similar aspect ratio. In this case, we only
    # group in two cases: those with width / height > 1, and the other way around,
    # but the code supports more general grouping strategy
    aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else []

    paths_catalog = import_file("maskrcnn_benchmark.config.paths_catalog",
                                cfg.PATHS_CATALOG, True)
    # print(cfg.PATHS_CATALOG)
    DatasetCatalog = paths_catalog.DatasetCatalog
    logger = logging.getLogger(__name__)
    dataset_list = cfg.DATASETS.TRAIN
    if not is_train:
        a = np.arange(101)
        indices = np.delete(a, indices)
    # print()
    logger.info(f"At DATA LOADER indices are {indices}")

    transforms = build_transforms(cfg, is_train)
    datasets = build_dataset(dataset_list, transforms, DatasetCatalog,
                             is_train, cfg.DATASETS.STRATEGY, indices)

    data_loaders = []
    for dataset in datasets:
        sampler = make_data_sampler(dataset, shuffle, is_distributed)
        #TO DELETE THE LINE
        # print("AD DATA SAMPLES", dataset.get_indices())
        batch_sampler = make_batch_data_sampler(dataset, sampler,
                                                aspect_grouping,
                                                images_per_gpu, num_iters,
                                                start_iter)
        collator = BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY)
        num_workers = cfg.DATALOADER.NUM_WORKERS
        data_loader = torch.utils.data.DataLoader(
            dataset,
            num_workers=num_workers,
            batch_sampler=batch_sampler,
            collate_fn=collator,
        )
        data_loaders.append(data_loader)
    if is_train:
        # during training, a single (possibly concatenated) data_loader is returned
        assert len(data_loaders) == 1
        return data_loaders[0]
    return data_loaders
Exemple #25
0
def do_train(
    cfg,
    model,
    data_loader,
    data_loader_val,
    optimizer,
    scheduler,
    checkpointer,
    device,
    checkpoint_period,
    test_period,
    arguments,
    meters,
    meters_val,
):
    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")
    # meters = MetricLogger(delimiter="  ")
    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    model.train()
    start_training_time = time.time()
    end = time.time()
    if cfg.MODEL.QRY_BALANCE:
        qry_cls_json_file = cfg.MODEL.QRY_INDICE_CLS
        with open(qry_cls_json_file, 'r') as f:
            batch_cls_qry = json.load(f)

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    dataset_names = cfg.DATASETS.TEST
    rank = dist.get_rank()

    for iteration, (images, targets,
                    img_id) in enumerate(data_loader, start_iter):
        #         print(img_id)
        if any(len(target) < 1 for target in targets):
            logger.error(
                f"Iteration={iteration + 1} || Image Ids used for training {img_id} || targets Length={[len(target) for target in targets]}"
            )
            continue
        data_time = time.time() - end

        scheduler.step()
        images = images.to(device)
        targets = [target.to(device) for target in targets]
        #         print('batch_id_qry', batch_id_qry, img_id,
        #               targets[0].extra_fields, targets[1].extra_fields)
        if cfg.MODEL.QRY_BALANCE:
            batch_id_qry = batch_cls_qry[rank][iteration * 2:iteration * 2 + 2]
            # print(img_id)
            # batch_id_qry = [batch_cls_qry[rank][iteration]]
            loss_dict = model(images,
                              targets,
                              batch_id=batch_id_qry,
                              use_distill=cfg.MODEL.USE_DISTILL,
                              img_id=img_id)
        else:
            loss_dict = model(images,
                              targets,
                              use_distill=cfg.MODEL.USE_DISTILL,
                              img_id=img_id[0])
        losses = sum(loss for loss in loss_dict.values())

        iteration = iteration + 1
        arguments["iteration"] = iteration
        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        meters.update(iteration,
                      loss=losses_reduced,
                      lr=optimizer.param_groups[0]["lr"],
                      **loss_dict_reduced)

        optimizer.zero_grad()
        # Note: If mixed precision is not used, this ends up doing nothing
        # Otherwise apply loss scaling for mixed-precision recipe
        with amp.scale_loss(losses, optimizer) as scaled_losses:
            scaled_losses.backward()
        optimizer.step()

        batch_time = time.time() - end
        end = time.time()
        meters.update(iteration, time=batch_time, data=data_time)

        eta_seconds = meters.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        if iteration % 20 == 0 or iteration == max_iter:
            logger.info(
                meters.delimiter.join([
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)

        if data_loader_val is not None and test_period > 0 and iteration % test_period == 0:
            # meters_val = MetricLogger(delimiter="  ")
            synchronize()
            torch.cuda.empty_cache()

            output_folder = os.path.join(cfg.OUTPUT_DIR, "Validation")
            mkdir(output_folder)
            res_infer = inference(  # The result can be used for additional logging, e. g. for TensorBoard
                model,
                iteration,
                # The method changes the segmentation mask format in a data loader,
                # so every time a new data loader is created:
                make_data_loader(cfg,
                                 is_train=False,
                                 is_distributed=(get_world_size() > 1),
                                 is_for_period=True),
                dataset_name="[Validation]",
                iou_types=iou_types,
                box_only=False
                if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
                device=cfg.MODEL.DEVICE,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=output_folder,
            )
            # import pdb; pdb.set_trace()
            if res_infer:
                meters_val.update(iteration, **res_infer)

            synchronize()
            model.train()

            # the following part can be deleted
            # with torch.no_grad():
            #     # Should be one image for each GPU:
            #     for iteration_val, (images_val, targets_val, _) in enumerate(tqdm(data_loader_val)):
            #         images_val = images_val.to(device)
            #         targets_val = [target.to(device) for target in targets_val]
            #         loss_dict = model(images_val, targets_val)
            #         losses = sum(loss for loss in loss_dict.values())
            #         loss_dict_reduced = reduce_loss_dict(loss_dict)
            #         losses_reduced = sum(
            #             loss for loss in loss_dict_reduced.values())
            #         meters_val.update(
            #             iteration, loss=losses_reduced, **loss_dict_reduced)
            # synchronize()
            ####

            logger.info(
                meters_val.delimiter.join([
                    "[Validation]: ",
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters_val),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
        if iteration == max_iter:
            checkpointer.save("model_final", **arguments)

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / (max_iter)))
Exemple #26
0
def do_face_train_triplet(
    cfg,
    model,
    data_loader,
    data_loader_val,
    optimizer,
    scheduler,
    checkpointer,
    device,
    checkpoint_period,
    test_period,
    arguments,
    divs_nums,
):
    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")
    meters = MetricLogger(delimiter="  ")
    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    model.train()
    start_training_time = time.time()
    end = time.time()
    dataset_names = cfg.DATASETS.TEST
    for iteration, (img_a, img_p, img_n, label_p,
                    label_n) in enumerate(data_loader, start_iter):
        data_time = time.time() - end
        iteration = iteration + 1
        arguments["iteration"] = iteration
        img_a_list, _ = divs_tensors(device=device,
                                     tensors=img_a,
                                     targets=None,
                                     divs_nums=divs_nums)
        img_p_list, label_p_list = divs_tensors(device=device,
                                                tensors=img_p,
                                                targets=label_p,
                                                divs_nums=divs_nums)
        img_n_list, label_n_list = divs_tensors(device=device,
                                                tensors=img_n,
                                                targets=label_n,
                                                divs_nums=divs_nums)
        ####======== 拆分batch 可能对bn层有影响 ==========####
        optimizer.zero_grad()
        for img_a, img_p, img_n, label_p, label_n in zip(
                img_a_list, img_p_list, img_n_list, label_p_list,
                label_n_list):
            loss_dict = model(tensors=[img_a, img_p, img_n],
                              targets=[label_p, label_n],
                              batch=iteration,
                              total_batch=None)
            losses = sum(loss for loss in loss_dict.values())
            # reduce losses over all GPUs for logging purposes
            loss_dict_reduced = reduce_loss_dict(loss_dict)
            losses_reduced = sum(loss for loss in loss_dict_reduced.values())
            meters.update(loss=losses_reduced, **loss_dict_reduced)
            losses /= divs_nums
            with amp.scale_loss(losses, optimizer) as scaled_losses:
                scaled_losses.backward()
        optimizer.step()
        scheduler.step()
        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time, data=data_time)

        eta_seconds = meters.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        if iteration % 20 == 0 or iteration == max_iter:
            logger.info(
                meters.delimiter.join([
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)
            if iteration > 40000:
                checkpointer.save_backbone("BACKBONE_{:07d}".format(iteration))
        #####========= data test ============#######
        if data_loader_val is not None and test_period > 0 and iteration % test_period == 0:
            meters_val = MetricLogger(delimiter="  ")
            synchronize()
            _ = inference(  # The result can be used for additional logging, e. g. for TensorBoard
                model,
                # The method changes the segmentation mask format in a data loader,
                # so every time a new data loader is created:
                make_data_loader(cfg,
                                 is_train=False,
                                 is_distributed=(get_world_size() > 1),
                                 is_for_period=True),
                dataset_name="[Validation]",
                iou_types=iou_types,
                box_only=False
                if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
                device=cfg.MODEL.DEVICE,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=None,
            )
            synchronize()
            model.train()
            with torch.no_grad():
                # Should be one image for each GPU:
                for iteration_val, (images_val, targets_val,
                                    _) in enumerate(tqdm(data_loader_val)):
                    images_val = images_val.to(device)
                    targets_val = [target.to(device) for target in targets_val]
                    loss_dict = model(images_val, targets_val)
                    losses = sum(loss for loss in loss_dict.values())
                    loss_dict_reduced = reduce_loss_dict(loss_dict)
                    losses_reduced = sum(
                        loss for loss in loss_dict_reduced.values())
                    meters_val.update(loss=losses_reduced, **loss_dict_reduced)
            synchronize()
            logger.info(
                meters_val.delimiter.join([
                    "[Validation]: ",
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters_val),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))

        if iteration == max_iter:
            checkpointer.save("model_final", **arguments)
            checkpointer.save_backbone("model_final")
    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / (max_iter)))
Exemple #27
0
def do_train(
    cfg,
    model,
    data_loader,
    data_loader_val,
    optimizer,
    scheduler,
    checkpointer,
    device,
    checkpoint_period,
    test_period,
    arguments,
    meters,
):
    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")
    # meters = MetricLogger(delimiter="  ")
    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    model.train()
    start_training_time = time.time()
    end = time.time()

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    # dataset_names = cfg.DATASETS.TEST

    backbone_rngs, head_rngs, inter_rngs, rng, rngs = None, None, None, None, None

    if 'search' in cfg.MODEL.BACKBONE.CONV_BODY or \
        'search' in cfg.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR or \
        'search' in cfg.MODEL.SEG_BRANCH.SEGMENT_BRANCH:
        # synchronize rngs

        num_states = sum(cfg.MODEL.BACKBONE.STAGE_REPEATS)
        if cfg.MODEL.SEG_BRANCH.SHARE_SUBNET:
            head_layers = len(cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS
                              ) + cfg.MODEL.SEG_BRANCH.SUBNET_DEPTH
        else:
            head_layers = len(cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS
                              ) + 4 * cfg.MODEL.SEG_BRANCH.SUBNET_DEPTH
        inter_layers = cfg.NAS.INTER_LAYERS
        backbone_ss_size = len(blocks_key)
        head_ss_size = len(head_ss_keys)
        inter_ss_size = cfg.NAS.INTER_SIZE

        if 'search' in cfg.MODEL.BACKBONE.CONV_BODY:
            _lcm = backbone_ss_size * head_ss_size // math.gcd(
                backbone_ss_size, head_ss_size)
            lcm = inter_ss_size * _lcm // math.gcd(inter_ss_size, _lcm)
        else:
            lcm = inter_ss_size * head_ss_size // math.gcd(
                inter_ss_size, head_ss_size)

        # print('lcm:', lcm)
    fwd_idx = -1
    for iteration, (images, targets, segment_target, _, img_ids,
                    ori_sizes) in enumerate(data_loader, start_iter):

        if any(len(target) < 1 for target in targets):
            logger.error(
                "Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}"
            )
            continue
        data_time = time.time() - end
        iteration = iteration + 1
        arguments["iteration"] = iteration

        images = images.to(device)
        targets = [target.to(device) for target in targets]

        if 'search' in cfg.MODEL.BACKBONE.CONV_BODY or \
        'search' in cfg.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR or \
        'search' in cfg.MODEL.SEG_BRANCH.SEGMENT_BRANCH:
            if rngs is None or iteration % lcm == 0:
                del rngs
                if 'search' in cfg.MODEL.BACKBONE.CONV_BODY:
                    backbone_rngs = generate_rng(num_states, backbone_ss_size,
                                                 lcm)
                head_rngs = generate_rng(head_layers, head_ss_size, lcm)
                inter_rngs = generate_rng(inter_layers, inter_ss_size, lcm)
                if 'search' in cfg.MODEL.BACKBONE.CONV_BODY:
                    rngs = np.concatenate(
                        [backbone_rngs, head_rngs, inter_rngs],
                        axis=0).transpose(1, 0)
                    del backbone_rngs
                else:
                    rngs = np.concatenate([head_rngs, inter_rngs],
                                          axis=0).transpose(1, 0)
                del head_rngs, inter_rngs

            rng = rngs[iteration % lcm]
            rng = broadcast_data(rng.tolist())

            fwd_idx = (fwd_idx + 1) % lcm

            loss_dict = model(images,
                              targets,
                              segment_target,
                              img_ids=img_ids,
                              c2d=None,
                              ori_sizes=ori_sizes,
                              rngs=rng)
            del rng
        else:
            loss_dict = model(images,
                              targets,
                              segment_target,
                              img_ids=img_ids,
                              c2d=None,
                              ori_sizes=ori_sizes)

        if cfg.MODEL.SEG_BRANCH.ADD_SEG_BRANCH:
            segmentation_loss = loss_dict.pop("loss_segmentation")
            losses = cfg.MODEL.SEG_BRANCH.LAMDA_INSTANCE * sum(
                loss for loss in loss_dict.values()
            ) + cfg.MODEL.SEG_BRANCH.LAMDA_SEGMENTATION * segmentation_loss
            loss_dict[
                'loss_segmentation'] = segmentation_loss  # reproduce the complete loss
        else:
            losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        meters.update(loss=losses.item(), **loss_dict_reduced)

        if 'search' in cfg.MODEL.BACKBONE.CONV_BODY or \
            'search' in cfg.MODEL.SEG_BRANCH.SEGMENT_BRANCH or \
            'search' in cfg.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR:
            if fwd_idx == 0:
                optimizer.zero_grad()
            losses.backward()
            if fwd_idx == lcm - 1:
                optimizer.step()
        else:
            optimizer.zero_grad()
            # losses.backward()
            # Note: If mixed precision is not used, this ends up doing nothing
            # Otherwise apply loss scaling for mixed-precision recipe
            with amp.scale_loss(losses, optimizer) as scaled_losses:
                scaled_losses.backward()
            optimizer.step()

        scheduler.step()

        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time, data=data_time)

        del loss_dict, losses, images, targets
        # torch.cuda.empty_cache()

        eta_seconds = meters.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        if iteration % 20 == 0 or iteration == max_iter:
            logger.info(
                meters.delimiter.join([
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)
        if data_loader_val is not None and test_period > 0 and iteration % test_period == 0:
            meters_val = MetricLogger(delimiter="  ")
            synchronize()
            _ = inference(  # The result can be used for additional logging, e. g. for TensorBoard
                model,
                # The method changes the segmentation mask format in a data loader,
                # so every time a new data loader is created:
                make_data_loader(cfg,
                                 is_train=False,
                                 is_distributed=(get_world_size() > 1),
                                 is_for_period=True),
                dataset_name="[Validation]",
                iou_types=iou_types,
                box_only=False
                if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
                device=cfg.MODEL.DEVICE,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=None,
                c2d_json_path=cfg.MODEL.SEG_BRANCH.JSON_PATH,
                cfg=cfg,
            )
            synchronize()
            model.train()
            with torch.no_grad():
                # Should be one image for each GPU:
                for iteration_val, (images_val, targets_val,
                                    _) in enumerate(tqdm(data_loader_val)):
                    images_val = images_val.to(device)
                    targets_val = [target.to(device) for target in targets_val]
                    loss_dict = model(images_val, targets_val)
                    # losses = sum(loss for loss in loss_dict.values())
                    if cfg.MODEL.SEG_BRANCH.ADD_SEG_BRANCH:
                        segmentation_loss = loss_dict.pop("loss_segmentation")
                        losses = cfg.MODEL.SEG_BRANCH.LAMDA_INSTANCE * sum(
                            loss for loss in loss_dict.values()
                        ) + cfg.MODEL.SEG_BRANCH.LAMDA_SEGMENTATION * segmentation_loss
                        loss_dict[
                            'loss_segmentation'] = segmentation_loss  # reproduce the complete loss
                    else:
                        losses = sum(loss for loss in loss_dict.values())
                    loss_dict_reduced = reduce_loss_dict(loss_dict)
                    meters_val.update(loss=losses.item(), **loss_dict_reduced)
            synchronize()
            logger.info(
                meters_val.delimiter.join([
                    "[Validation]: ",
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters_val),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
        if iteration == max_iter:
            checkpointer.save("model_final", **arguments)

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / (max_iter)))
Exemple #28
0
def make_data_loader(cfg, is_train=True, is_distributed=False, start_iter=0, is_for_period=False):
    num_gpus = get_world_size()
    if is_train:
        images_per_batch = cfg.SOLVER.IMS_PER_BATCH
        assert (
            images_per_batch % num_gpus == 0
        ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of GPUs ({}) used.".format(
            images_per_batch, num_gpus)
        images_per_gpu = images_per_batch // num_gpus
        shuffle = True
        num_iters = cfg.SOLVER.MAX_ITER
    else:
        images_per_batch = cfg.TEST.IMS_PER_BATCH
        assert (
            images_per_batch % num_gpus == 0
        ), "TEST.IMS_PER_BATCH ({}) must be divisible by the number of GPUs ({}) used.".format(
            images_per_batch, num_gpus)
        images_per_gpu = images_per_batch // num_gpus
        shuffle = False if not is_distributed else True
        num_iters = None
        start_iter = 0

    if images_per_gpu > 1:
        logger = logging.getLogger(__name__)
        logger.warning(
            "When using more than one image per GPU you may encounter "
            "an out-of-memory (OOM) error if your GPU does not have "
            "sufficient memory. If this happens, you can reduce "
            "SOLVER.IMS_PER_BATCH (for training) or "
            "TEST.IMS_PER_BATCH (for inference). For training, you must "
            "also adjust the learning rate and schedule length according "
            "to the linear scaling rule. See for example: "
            "https://github.com/facebookresearch/Detectron/blob/master/configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml#L14"
        )

    # group images which have similar aspect ratio. In this case, we only
    # group in two cases: those with width / height > 1, and the other way around,
    # but the code supports more general grouping strategy
    aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else []

    paths_catalog = import_file(
        "maskrcnn_benchmark.config.paths_catalog", cfg.PATHS_CATALOG, True
    )
    DatasetCatalog = paths_catalog.DatasetCatalog
    dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST

    # If bbox aug is enabled in testing, simply set transforms to None and we will apply transforms later
    transforms = None if not is_train and cfg.TEST.BBOX_AUG.ENABLED else build_transforms(cfg, is_train)
    datasets = build_dataset(dataset_list, transforms, DatasetCatalog, is_train or is_for_period)

    if is_train:
        # save category_id to label name mapping
        save_labels(datasets, cfg.OUTPUT_DIR)

    data_loaders = []
    for dataset in datasets:
        sampler = make_data_sampler(dataset, shuffle, is_distributed)
        batch_sampler = make_batch_data_sampler(
            dataset, sampler, aspect_grouping, images_per_gpu, num_iters, start_iter
        )
        collator = BBoxAugCollator() if not is_train and cfg.TEST.BBOX_AUG.ENABLED else \
            BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY)
        num_workers = cfg.DATALOADER.NUM_WORKERS
        data_loader = torch.utils.data.DataLoader(
            dataset,
            num_workers=num_workers,
            batch_sampler=batch_sampler,
            collate_fn=collator,
        )
        data_loaders.append(data_loader)
    if is_train or is_for_period:
        # during training, a single (possibly concatenated) data_loader is returned
        assert len(data_loaders) == 1
        return data_loaders[0]
    return data_loaders
Exemple #29
0
def inference(
    model,
    data_loader,
    dataset_name,
    iou_types=("bbox", ),
    box_only=False,
    device=torch.device("cuda"),
    expected_results=0,
    expected_results_sigma_tol=0,
    output_folder=None,
    cfg=None,
    bbox_aug=False,
    visualize_results=False,
    visualization_label="coco",
    only_visualization=False,
):
    num_devices = get_world_size()
    logger = logging.getLogger("maskrcnn_benchmark.inference")
    dataset = data_loader.dataset
    logger.info("Start evaluation on {} dataset({} images).".format(
        dataset_name, len(dataset)))

    total_timer = Timer()
    inference_timer = Timer()
    total_timer.tic()
    roi_predictions, img_predictions, attention_maps = compute_on_dataset(
        model, data_loader, device, bbox_aug=bbox_aug, timer=inference_timer)

    # wait for all processes to complete before measuring the time
    synchronize()

    total_time = total_timer.toc()
    total_time_str = get_time_str(total_time)
    logger.info(
        "Total run time: {} ({} s / img per device, on {} devices)".format(
            total_time_str, total_time * num_devices / len(dataset),
            num_devices))
    total_infer_time = get_time_str(inference_timer.total_time)
    logger.info(
        "Model inference time: {} ({} s / img per device, on {} devices)".
        format(
            total_infer_time,
            inference_timer.total_time * num_devices / len(dataset),
            num_devices,
        ))

    if roi_predictions:
        roi_predictions = _accumulate_predictions_from_multiple_gpus(
            roi_predictions)
    if img_predictions:
        img_predictions = _accumulate_predictions_from_multiple_gpus(
            img_predictions)
    if attention_maps:
        attention_maps = _accumulate_predictions_from_multiple_gpus(
            attention_maps)

    if not is_main_process():
        return

    if roi_predictions and len(roi_predictions) > 0:
        for prediction in roi_predictions:
            if prediction.has_field("pred_scores"):
                prediction.add_field('second_scores',
                                     prediction.get_field('pred_scores'))
                del prediction.extra_fields["pred_scores"]
            if prediction.has_field("pred_labels"):
                prediction.add_field('second_labels',
                                     prediction.get_field('pred_labels'))
                del prediction.extra_fields["pred_labels"]

        if output_folder:
            torch.save(roi_predictions,
                       os.path.join(output_folder, "roi_predictions.pth"))

        print('Visualize results')
        if output_folder and visualize_results:
            categories = import_file(
                "maskrcnn_benchmark.data.datasets.categories.{}_categories".
                format(visualization_label),
                os.path.join(
                    os.path.dirname(os.path.dirname(cfg.PATHS_CATALOG)),
                    'data', 'categories',
                    '{}_categories.py'.format(visualization_label)), True)
            visualizer = Visualizer(categories=categories.CATEGORIES, cfg=cfg)
            visualizer.visualize_attentions(
                attention_maps, dataset,
                os.path.join(output_folder, 'attention_map'))
            visualizer.visualize_predictions(
                roi_predictions, dataset,
                os.path.join(output_folder, 'visualization'))
            if only_visualization:
                return

        extra_args = dict(
            box_only=box_only,
            iou_types=iou_types,
            expected_results=expected_results,
            expected_results_sigma_tol=expected_results_sigma_tol,
        )

        print('ROI: Evaluate')
        evaluate_roi(dataset=dataset,
                     predictions=roi_predictions,
                     output_folder=output_folder,
                     **extra_args)

    if img_predictions and len(img_predictions) > 0:
        if output_folder:
            torch.save(img_predictions,
                       os.path.join(output_folder, "img_predictions.pth"))
        print('IMAGE: Evaluate')
        evaluate_img(dataset=dataset,
                     predictions=img_predictions,
                     output_folder=output_folder)