def __init__(
        self,
        cfg,
        confidence_threshold=0.7,
        show_mask_heatmaps=False,
        masks_per_dim=2,
        min_image_size=224,
    ):
        self.cfg = cfg.clone()
        self.model = build_detection_model(cfg)
        self.model.eval()
        self.device = torch.device(cfg.MODEL.DEVICE)
        self.model.to(self.device)
        self.min_image_size = min_image_size

        checkpointer = DetectronCheckpointer(cfg, self.model)
        _ = checkpointer.load(cfg.MODEL.WEIGHT)

        self.transforms = self.build_transform()

        mask_threshold = -1 if show_mask_heatmaps else 0.5
        self.masker = Masker(threshold=mask_threshold, padding=1)

        # used to make colors for each class
        self.palette = torch.tensor([2 ** 25 - 1, 2 ** 15 - 1, 2 ** 21 - 1])

        self.cpu_device = torch.device("cpu")
        self.confidence_threshold = confidence_threshold
        self.show_mask_heatmaps = show_mask_heatmaps
        self.masks_per_dim = masks_per_dim
def train(cfg, local_rank, distributed):
    model = build_detection_model(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    optimizer = make_optimizer(cfg, model)
    scheduler = make_lr_scheduler(cfg, optimizer)

    if distributed:
        model = torch.nn.parallel.deprecated.DistributedDataParallel(
            model, device_ids=[local_rank], output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
        )

    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(
        cfg, model, optimizer, scheduler, output_dir, save_to_disk
    )
    extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)
    arguments.update(extra_checkpoint_data)

    data_loader = make_data_loader(
        cfg,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"],
    )

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    do_train(
        model,
        data_loader,
        optimizer,
        scheduler,
        checkpointer,
        device,
        checkpoint_period,
        arguments,
    )

    return model
def train(cfg, local_rank, distributed, logger):
    if is_main_process():
        wandb.init(project='scene-graph',
                   entity='sgg-speaker-listener',
                   config=cfg.LISTENER)
    debug_print(logger, 'prepare training')

    model = build_detection_model(cfg)
    listener = build_listener(cfg)
    if is_main_process():
        wandb.watch(listener)

    debug_print(logger, 'end model construction')

    # modules that should be always set in eval mode
    # their eval() method should be called after model.train() is called
    eval_modules = (
        model.rpn,
        model.backbone,
        model.roi_heads.box,
    )

    fix_eval_modules(eval_modules)

    # NOTE, we slow down the LR of the layers start with the names in slow_heads
    if cfg.MODEL.ROI_RELATION_HEAD.PREDICTOR == "IMPPredictor":
        slow_heads = [
            "roi_heads.relation.box_feature_extractor",
            "roi_heads.relation.union_feature_extractor.feature_extractor",
        ]
    else:
        slow_heads = []

    # load pretrain layers to new layers
    load_mapping = {
        "roi_heads.relation.box_feature_extractor":
        "roi_heads.box.feature_extractor",
        "roi_heads.relation.union_feature_extractor.feature_extractor":
        "roi_heads.box.feature_extractor"
    }

    if cfg.MODEL.ATTRIBUTE_ON:
        load_mapping[
            "roi_heads.relation.att_feature_extractor"] = "roi_heads.attribute.feature_extractor"
        load_mapping[
            "roi_heads.relation.union_feature_extractor.att_feature_extractor"] = "roi_heads.attribute.feature_extractor"

    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)
    listener.to(device)

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    num_batch = cfg.SOLVER.IMS_PER_BATCH
    optimizer = make_optimizer(cfg,
                               model,
                               logger,
                               slow_heads=slow_heads,
                               slow_ratio=10.0,
                               rl_factor=float(num_batch))
    listener_optimizer = make_listener_optimizer(cfg, listener)
    scheduler = make_lr_scheduler(cfg, optimizer, logger)
    listener_scheduler = None
    debug_print(logger, 'end optimizer and shcedule')
    # Initialize mixed-precision training
    use_mixed_precision = cfg.DTYPE == "float16"
    amp_opt_level = 'O1' if use_mixed_precision else 'O0'
    #listener, listener_optimizer = amp.initialize(listener, listener_optimizer, opt_level='O0')
    [model, listener], [optimizer, listener_optimizer
                        ] = amp.initialize([model, listener],
                                           [optimizer, listener_optimizer],
                                           opt_level='O1',
                                           loss_scale=1)
    model = amp.initialize(model, opt_level='O1')

    if distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[local_rank],
            output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
            find_unused_parameters=True,
        )

        listener = torch.nn.parallel.DistributedDataParallel(
            listener,
            device_ids=[local_rank],
            output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
            find_unused_parameters=True,
        )

    debug_print(logger, 'end distributed')
    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR
    listener_dir = cfg.LISTENER_DIR
    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(cfg,
                                         model,
                                         optimizer,
                                         scheduler,
                                         output_dir,
                                         save_to_disk,
                                         custom_scheduler=True)

    listener_checkpointer = Checkpointer(listener,
                                         optimizer=listener_optimizer,
                                         save_dir=listener_dir,
                                         save_to_disk=save_to_disk,
                                         custom_scheduler=False)

    if checkpointer.has_checkpoint():
        extra_checkpoint_data = checkpointer.load(
            cfg.MODEL.PRETRAINED_DETECTOR_CKPT,
            update_schedule=cfg.SOLVER.UPDATE_SCHEDULE_DURING_LOAD)
        arguments.update(extra_checkpoint_data)
    else:
        # load_mapping is only used when we init current model from detection model.
        checkpointer.load(cfg.MODEL.PRETRAINED_DETECTOR_CKPT,
                          with_optim=False,
                          load_mapping=load_mapping)

    # if there is certain checkpoint in output_dir, load it, else load pretrained detector
    if listener_checkpointer.has_checkpoint():
        extra_listener_checkpoint_data = listener_checkpointer.load()
        amp.load_state_dict(extra_listener_checkpoint_data['amp'])
        '''
        print('Weights after load: ')
        print('****************************')
        print(listener.gnn.conv1.node_model.node_mlp_1[0].weight)
        print('****************************')
        '''
        # arguments.update(extra_listener_checkpoint_data)
    debug_print(logger, 'end load checkpointer')
    train_data_loader = make_data_loader(cfg,
                                         mode='train',
                                         is_distributed=distributed,
                                         start_iter=arguments["iteration"],
                                         ret_images=True)
    val_data_loaders = make_data_loader(cfg,
                                        mode='val',
                                        is_distributed=distributed,
                                        ret_images=True)

    debug_print(logger, 'end dataloader')
    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    if cfg.SOLVER.PRE_VAL:
        logger.info("Validate before training")
        #output =  run_val(cfg, model, listener, val_data_loaders, distributed, logger)
        #print('OUTPUT: ', output)
        #(sg_loss, img_loss, sg_acc, img_acc) = output

    logger.info("Start training")
    meters = MetricLogger(delimiter="  ")
    max_iter = len(train_data_loader)
    start_iter = arguments["iteration"]
    start_training_time = time.time()
    end = time.time()

    print_first_grad = True

    listener_loss_func = torch.nn.MarginRankingLoss(margin=1, reduction='none')
    mistake_saver = None
    if is_main_process():
        ds_catalog = DatasetCatalog()
        dict_file_path = os.path.join(
            ds_catalog.DATA_DIR,
            ds_catalog.DATASETS['VG_stanford_filtered_with_attribute']
            ['dict_file'])
        ind_to_classes, ind_to_predicates = load_vg_info(dict_file_path)
        ind_to_classes = {k: v for k, v in enumerate(ind_to_classes)}
        ind_to_predicates = {k: v for k, v in enumerate(ind_to_predicates)}
        print('ind to classes:', ind_to_classes, '/n ind to predicates:',
              ind_to_predicates)
        mistake_saver = MistakeSaver(
            '/Scene-Graph-Benchmark.pytorch/filenames_masked', ind_to_classes,
            ind_to_predicates)

    #is_printed = False
    while True:
        try:
            listener_iteration = 0
            for iteration, (images, targets,
                            image_ids) in enumerate(train_data_loader,
                                                    start_iter):
                listener_optimizer.zero_grad()

                #print(f'ITERATION NUMBER: {iteration}')
                if any(len(target) < 1 for target in targets):
                    logger.error(
                        f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}"
                    )
                if len(images) <= 1:
                    continue

                data_time = time.time() - end
                iteration = iteration + 1
                listener_iteration += 1
                arguments["iteration"] = iteration
                model.train()
                fix_eval_modules(eval_modules)
                images_list = deepcopy(images)
                images_list = to_image_list(
                    images_list, cfg.DATALOADER.SIZE_DIVISIBILITY).to(device)

                #SAVE IMAGE TO PC
                '''
                transform = transforms.Compose([
                    transforms.ToPILImage(),
                    #transforms.Resize((cfg.LISTENER.IMAGE_SIZE, cfg.LISTENER.IMAGE_SIZE)),
                    transforms.ToTensor(),
                ])
                '''
                # turn images to a uniform size
                #print('IMAGE BEFORE Transform: ', images[0], 'GPU: ', get_rank())
                '''

                if is_main_process():
                    if not is_printed:
                        transform = transforms.ToPILImage()
                        print('SAVING IMAGE')
                        img = transform(images[0].cpu())
                        print('DONE TRANSFORM')
                        img.save('image.png')
                        print('DONE SAVING IMAGE')
                        print('ids ', image_ids[0])

                '''

                for i in range(len(images)):
                    images[i] = images[i].unsqueeze(0)
                    images[i] = F.interpolate(images[i],
                                              size=(224, 224),
                                              mode='bilinear',
                                              align_corners=False)
                    images[i] = images[i].squeeze()

                images = torch.stack(images).to(device)
                #images.requires_grad_()

                targets = [target.to(device) for target in targets]

                #print('IMAGE BEFORE Model: ', images[0], 'GPU: ', get_rank())
                _, sgs = model(images_list, targets)
                #print('IMAGE AFTER Model: ', images)
                '''
                is_printed = False
                if is_main_process():
                    if not is_printed:
                        print('PRINTING OBJECTS')
                        (obj, rel_pair, rel) = sgs[0]
                        obj = torch.argmax(obj, dim=1)
                        for i in range(obj.size(0)):
                            print(f'OBJECT {i}: ', obj[i])
                        print('DONE PRINTING OBJECTS')
                        is_printed=True

                '''
                image_list = None
                sgs = collate_sgs(sgs, cfg.MODEL.DEVICE)
                ''' 

                if is_main_process():
                    if not is_printed:
                        mistake_saver.add_mistake((image_ids[0], image_ids[1]), (sgs[0], sgs[1]), 231231, 'SG') 
                        mistake_saver.toHtml('/www')
                        is_printed = True
                
                '''

                listener_loss = 0
                gap_reward = 0
                avg_acc = 0
                num_correct = 0
                score_matrix = torch.zeros((images.size(0), images.size(0)))
                # fill score matrix
                for true_index, sg in enumerate(sgs):
                    acc = 0
                    detached_sg = (sg[0].detach().requires_grad_().to(
                        torch.float32), sg[1].long(),
                                   sg[2].detach().requires_grad_().to(
                                       torch.float32))
                    #scores = listener(sg, images)
                    with amp.disable_casts():
                        scores = listener(detached_sg, images)
                    score_matrix[true_index] = scores

                #print('Score matrix:', score_matrix)
                score_matrix = score_matrix.to(device)
                # fill loss matrix
                loss_matrix = torch.zeros((2, images.size(0), images.size(0)),
                                          device=device)
                # sg centered scores
                for true_index in range(loss_matrix.size(1)):
                    row_score = score_matrix[true_index]
                    (true_scores, predicted_scores,
                     binary) = format_scores(row_score, true_index, device)
                    loss_vec = listener_loss_func(true_scores,
                                                  predicted_scores, binary)
                    loss_matrix[0][true_index] = loss_vec
                # image centered scores
                transposted_score_matrix = score_matrix.t()
                for true_index in range(loss_matrix.size(1)):
                    row_score = transposted_score_matrix[true_index]
                    (true_scores, predicted_scores,
                     binary) = format_scores(row_score, true_index, device)
                    loss_vec = listener_loss_func(true_scores,
                                                  predicted_scores, binary)
                    loss_matrix[1][true_index] = loss_vec

                print('iteration:', listener_iteration)
                sg_acc = 0
                img_acc = 0
                # calculate accuracy
                for i in range(loss_matrix.size(1)):
                    temp_sg_acc = 0
                    temp_img_acc = 0
                    for j in range(loss_matrix.size(2)):
                        if loss_matrix[0][i][i] > loss_matrix[0][i][j]:
                            temp_sg_acc += 1
                        else:
                            if cfg.LISTENER.HTML:
                                if is_main_process(
                                ) and listener_iteration >= 600 and listener_iteration % 25 == 0 and i != j:
                                    detached_sg_i = (sgs[i][0].detach(),
                                                     sgs[i][1],
                                                     sgs[i][2].detach())
                                    detached_sg_j = (sgs[j][0].detach(),
                                                     sgs[j][1],
                                                     sgs[j][2].detach())
                                    mistake_saver.add_mistake(
                                        (image_ids[i], image_ids[j]),
                                        (detached_sg_i, detached_sg_j),
                                        listener_iteration, 'SG')
                        if loss_matrix[1][i][i] > loss_matrix[1][j][i]:
                            temp_img_acc += 1
                        else:
                            if cfg.LISTENER.HTML:
                                if is_main_process(
                                ) and listener_iteration >= 600 and listener_iteration % 25 == 0 and i != j:
                                    detached_sg_i = (sgs[i][0].detach(),
                                                     sgs[i][1],
                                                     sgs[i][2].detach())
                                    detached_sg_j = (sgs[j][0].detach(),
                                                     sgs[j][1],
                                                     sgs[j][2].detach())
                                    mistake_saver.add_mistake(
                                        (image_ids[i], image_ids[j]),
                                        (detached_sg_i, detached_sg_j),
                                        listener_iteration, 'IMG')

                    temp_sg_acc = temp_sg_acc * 100 / (loss_matrix.size(1) - 1)
                    temp_img_acc = temp_img_acc * 100 / (loss_matrix.size(1) -
                                                         1)
                    sg_acc += temp_sg_acc
                    img_acc += temp_img_acc

                if cfg.LISTENER.HTML:
                    if is_main_process(
                    ) and listener_iteration % 100 == 0 and listener_iteration >= 600:
                        mistake_saver.toHtml('/www')

                sg_acc /= loss_matrix.size(1)
                img_acc /= loss_matrix.size(1)

                avg_sg_acc = torch.tensor([sg_acc]).to(device)
                avg_img_acc = torch.tensor([img_acc]).to(device)
                # reduce acc over all gpus
                avg_acc = {'sg_acc': avg_sg_acc, 'img_acc': avg_img_acc}
                avg_acc_reduced = reduce_loss_dict(avg_acc)

                sg_acc = sum(acc for acc in avg_acc_reduced['sg_acc'])
                img_acc = sum(acc for acc in avg_acc_reduced['img_acc'])

                # log acc to wadb
                if is_main_process():
                    wandb.log({
                        "Train SG Accuracy": sg_acc.item(),
                        "Train IMG Accuracy": img_acc.item()
                    })

                sg_loss = 0
                img_loss = 0

                for i in range(loss_matrix.size(0)):
                    for j in range(loss_matrix.size(1)):
                        loss_matrix[i][j][j] = 0.

                for i in range(loss_matrix.size(1)):
                    sg_loss += torch.max(loss_matrix[0][i])
                    img_loss += torch.max(loss_matrix[1][:][i])

                sg_loss = sg_loss / loss_matrix.size(1)
                img_loss = img_loss / loss_matrix.size(1)
                sg_loss = sg_loss.to(device)
                img_loss = img_loss.to(device)

                loss_dict = {'sg_loss': sg_loss, 'img_loss': img_loss}

                losses = sum(loss for loss in loss_dict.values())

                # reduce losses over all GPUs for logging purposes
                loss_dict_reduced = reduce_loss_dict(loss_dict)
                sg_loss_reduced = loss_dict_reduced['sg_loss']
                img_loss_reduced = loss_dict_reduced['img_loss']
                if is_main_process():
                    wandb.log({"Train SG Loss": sg_loss_reduced})
                    wandb.log({"Train IMG Loss": img_loss_reduced})

                losses_reduced = sum(loss
                                     for loss in loss_dict_reduced.values())
                meters.update(loss=losses_reduced, **loss_dict_reduced)

                # Note: If mixed precision is not used, this ends up doing nothing
                # Otherwise apply loss scaling for mixed-precision recipe
                losses.backward()
                #with amp.scale_loss(losses, listener_optimizer) as scaled_losses:
                #    scaled_losses.backward()

                verbose = (iteration % cfg.SOLVER.PRINT_GRAD_FREQ
                           ) == 0 or print_first_grad  # print grad or not
                print_first_grad = False
                #clip_grad_value([(n, p) for n, p in listener.named_parameters() if p.requires_grad], cfg.LISTENER.CLIP_VALUE, logger=logger, verbose=True, clip=True)
                listener_optimizer.step()

                batch_time = time.time() - end
                end = time.time()
                meters.update(time=batch_time, data=data_time)

                eta_seconds = meters.time.global_avg * (max_iter - iteration)
                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

                if iteration % 200 == 0 or iteration == max_iter:
                    logger.info(
                        meters.delimiter.join([
                            "eta: {eta}",
                            "iter: {iter}",
                            "{meters}",
                            "lr: {lr:.6f}",
                            "max mem: {memory:.0f}",
                        ]).format(
                            eta=eta_string,
                            iter=iteration,
                            meters=str(meters),
                            lr=listener_optimizer.param_groups[-1]["lr"],
                            memory=torch.cuda.max_memory_allocated() / 1024.0 /
                            1024.0,
                        ))

                if iteration % checkpoint_period == 0:
                    """
                    print('Model before save')
                    print('****************************')
                    print(listener.gnn.conv1.node_model.node_mlp_1[0].weight)
                    print('****************************')
                    """
                    listener_checkpointer.save(
                        "model_{:07d}".format(listener_iteration),
                        amp=amp.state_dict())
                    #listener_checkpointer.save("model_{:07d}".format(listener_iteration))

                if iteration == max_iter:
                    listener_checkpointer.save("model_final",
                                               amp=amp.state_dict())
                    #listener_checkpointer.save("model_final")

                val_result = None  # used for scheduler updating
                if cfg.SOLVER.TO_VAL and iteration % cfg.SOLVER.VAL_PERIOD == 0:
                    logger.info("Start validating")
                    val_result = run_val(cfg, model, listener,
                                         val_data_loaders, distributed, logger)
                    (sg_loss, img_loss, sg_acc, img_acc,
                     speaker_val) = val_result

                    if is_main_process():
                        wandb.log({
                            "Validation SG Accuracy": sg_acc,
                            "Validation IMG Accuracy": img_acc,
                            "Validation SG Loss": sg_loss,
                            "Validation IMG Loss": img_loss,
                            "Speaker Val": speaker_val,
                        })

        except Exception as err:
            raise (err)
            print('Dataset finished, creating new')
            train_data_loader = make_data_loader(
                cfg,
                mode='train',
                is_distributed=distributed,
                start_iter=arguments["iteration"],
                ret_images=True)

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / (max_iter)))
    return listener
def train(cfg, local_rank, distributed):
    model = build_detection_model(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    optimizer = make_optimizer(cfg, model)
    scheduler = make_lr_scheduler(cfg, optimizer)

    if distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[local_rank],
            output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
        )

    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler,
                                         output_dir, save_to_disk)
    extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)
    arguments.update(extra_checkpoint_data)

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    if cfg.MODEL.DOMAIN_ADAPTATION_ON:
        source_data_loader = make_data_loader(
            cfg,
            is_train=True,
            is_source=True,
            is_distributed=distributed,
            start_iter=arguments["iteration"],
        )
        target_data_loader = make_data_loader(
            cfg,
            is_train=True,
            is_source=False,
            is_distributed=distributed,
            start_iter=arguments["iteration"],
        )

        do_da_train(
            model,
            source_data_loader,
            target_data_loader,
            optimizer,
            scheduler,
            checkpointer,
            device,
            checkpoint_period,
            arguments,
            cfg,
        )
    else:
        data_loader = make_data_loader(
            cfg,
            is_train=True,
            is_distributed=distributed,
            start_iter=arguments["iteration"],
        )

        do_train(
            model,
            data_loader,
            optimizer,
            scheduler,
            checkpointer,
            device,
            checkpoint_period,
            arguments,
        )

    return model
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        required=True,
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument(
        '--model-path',
        type=Path,
        help=('Path to model pickle file. If not specified, the latest '
              'checkpoint, if it exists, or cfg.MODEL.WEIGHT is loaded.'))
    parser.add_argument(
        '--output-dir',
        default='{cfg_OUTPUT_DIR}/inference-{model_stem}',
        help=('Output directory. Can use variables {cfg_OUTPUT_DIR}, which is '
              'replaced by cfg.OUTPUT_DIR, and {model_stem}, which is '
              'replaced by the stem of the file used to load weights.'))
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "--ckpt",
        help=
        "The path to the checkpoint for test, default is the latest checkpoint.",
        default=None,
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()
    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    assert cfg.OUTPUT_DIR, 'cfg.OUTPUT_DIR must not be empty.'
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=cfg.OUTPUT_DIR)
    if args.model_path:
        load_path = str(args.model_path.resolve())
        load_msg = 'Loading model from --model-path: %s' % load_path
    else:
        if checkpointer.has_checkpoint():
            load_path = checkpointer.get_checkpoint_file()
            load_msg = 'Loading model from latest checkpoint: %s' % load_path
        else:
            load_path = cfg.MODEL.WEIGHT
            load_msg = 'Loading model from cfg.MODEL.WEIGHT: %s' % load_path

    output_dir = Path(
        args.output_dir.format(cfg_OUTPUT_DIR=cfg.OUTPUT_DIR,
                               model_stem=Path(load_path).stem))
    output_dir.mkdir(exist_ok=True, parents=True)
    file_logger = common_setup(__file__, output_dir, args)
    # We can't log the load_msg until we setup the output directory, but we
    # can't get the output directory until we figure out which model to load.
    # So we save load_msg and log it here.
    logging.info(load_msg)
    logging.info('Output inference results to: %s' % output_dir)

    logger = logging.getLogger("maskrcnn_benchmark")
    logger.info("Using {} GPUs".format(num_gpus))
    file_logger.info('Config:')
    file_logger.info(cfg)

    file_logger.info("Collecting env info (might take some time)")
    file_logger.info("\n" + collect_env_info())

    # Initialize mixed-precision if necessary
    use_mixed_precision = cfg.DTYPE == 'float16'
    amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt
    _ = checkpointer.load(ckpt, use_latest=args.ckpt is None)

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    for idx, dataset_name in enumerate(dataset_names):
        output_folder = output_dir / dataset_name
        mkdir(output_folder)
        output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        synchronize()
Exemple #6
0
def train(cfg, local_rank, distributed):
    # Model logging
    print_mlperf(key=mlperf_log.INPUT_BATCH_SIZE, value=cfg.SOLVER.IMS_PER_BATCH)
    print_mlperf(key=mlperf_log.BATCH_SIZE_TEST, value=cfg.TEST.IMS_PER_BATCH)

    print_mlperf(key=mlperf_log.INPUT_MEAN_SUBTRACTION, value = cfg.INPUT.PIXEL_MEAN)
    print_mlperf(key=mlperf_log.INPUT_NORMALIZATION_STD, value=cfg.INPUT.PIXEL_STD)
    print_mlperf(key=mlperf_log.INPUT_RESIZE)
    print_mlperf(key=mlperf_log.INPUT_RESIZE_ASPECT_PRESERVING)
    print_mlperf(key=mlperf_log.MIN_IMAGE_SIZE, value=cfg.INPUT.MIN_SIZE_TRAIN)
    print_mlperf(key=mlperf_log.MAX_IMAGE_SIZE, value=cfg.INPUT.MAX_SIZE_TRAIN)
    print_mlperf(key=mlperf_log.INPUT_RANDOM_FLIP)
    print_mlperf(key=mlperf_log.RANDOM_FLIP_PROBABILITY, value=0.5)
    print_mlperf(key=mlperf_log.FG_IOU_THRESHOLD, value=cfg.MODEL.RPN.FG_IOU_THRESHOLD)
    print_mlperf(key=mlperf_log.BG_IOU_THRESHOLD, value=cfg.MODEL.RPN.BG_IOU_THRESHOLD)
    print_mlperf(key=mlperf_log.RPN_PRE_NMS_TOP_N_TRAIN, value=cfg.MODEL.RPN.PRE_NMS_TOP_N_TRAIN)
    print_mlperf(key=mlperf_log.RPN_PRE_NMS_TOP_N_TEST, value=cfg.MODEL.RPN.PRE_NMS_TOP_N_TEST)
    print_mlperf(key=mlperf_log.RPN_POST_NMS_TOP_N_TRAIN, value=cfg.MODEL.RPN.FPN_POST_NMS_TOP_N_TRAIN)
    print_mlperf(key=mlperf_log.RPN_POST_NMS_TOP_N_TEST, value=cfg.MODEL.RPN.FPN_POST_NMS_TOP_N_TEST)
    print_mlperf(key=mlperf_log.ASPECT_RATIOS, value=cfg.MODEL.RPN.ASPECT_RATIOS)
    print_mlperf(key=mlperf_log.BACKBONE, value=cfg.MODEL.BACKBONE.CONV_BODY)
    print_mlperf(key=mlperf_log.NMS_THRESHOLD, value=cfg.MODEL.RPN.NMS_THRESH)

    model = build_detection_model(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    optimizer = make_optimizer(cfg, model)
    # Optimizer logging
    print_mlperf(key=mlperf_log.OPT_NAME, value=mlperf_log.SGD_WITH_MOMENTUM)
    print_mlperf(key=mlperf_log.OPT_LR, value=cfg.SOLVER.BASE_LR)
    print_mlperf(key=mlperf_log.OPT_MOMENTUM, value=cfg.SOLVER.MOMENTUM)
    print_mlperf(key=mlperf_log.OPT_WEIGHT_DECAY, value=cfg.SOLVER.WEIGHT_DECAY)


    scheduler = make_lr_scheduler(cfg, optimizer)

    if distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model, device_ids=[local_rank], output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
        )

    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(
        cfg, model, optimizer, scheduler, output_dir, save_to_disk
    )
    arguments["save_checkpoints"] = cfg.SAVE_CHECKPOINTS

    extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)
    arguments.update(extra_checkpoint_data)

    data_loader, iters_per_epoch = make_data_loader(
        cfg,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"],
    )

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    # set the callback function to evaluate and potentially
    # early exit each epoch
    if cfg.PER_EPOCH_EVAL:
        per_iter_callback_fn = functools.partial(
                mlperf_test_early_exit,
                iters_per_epoch=iters_per_epoch,
                tester=functools.partial(test, cfg=cfg),
                model=model,
                distributed=distributed,
                min_bbox_map=cfg.MLPERF.MIN_BBOX_MAP,
                min_segm_map=cfg.MLPERF.MIN_SEGM_MAP)
    else:
        per_iter_callback_fn = None

    start_train_time = time.time()

    do_train(
        model,
        data_loader,
        optimizer,
        scheduler,
        checkpointer,
        device,
        checkpoint_period,
        arguments,
        per_iter_start_callback_fn=functools.partial(mlperf_log_epoch_start, iters_per_epoch=iters_per_epoch),
        per_iter_end_callback_fn=per_iter_callback_fn,
    )

    end_train_time = time.time()
    total_training_time = end_train_time - start_train_time
    print(
            "&&&& MLPERF METRIC THROUGHPUT per GPU={:.4f} iterations / s".format((arguments["iteration"] * 1.0) / total_training_time)
    )

    return model
Exemple #7
0
def main():
    parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.deprecated.init_process_group(
            backend="nccl", init_method="env://"
        )

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    checkpointer = DetectronCheckpointer(cfg, model)
    _ = checkpointer.load(cfg.MODEL.WEIGHT)

    iou_types = ("bbox",)
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm",)
    output_folders = [None] * len(cfg.DATASETS.TEST)
    if cfg.OUTPUT_DIR:
        dataset_names = cfg.DATASETS.TEST
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed)
    for output_folder, data_loader_val in zip(output_folders, data_loaders_val):
        inference(
            model,
            data_loader_val,
            iou_types=iou_types,
            #box_only=cfg.MODEL.RPN_ONLY,
            #box_only=False if cfg.RETINANET.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            box_only=True,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        synchronize()
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default=
        "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument(
        "-i",
        "--image_folder",
        default="/media/DATA/HEVI_dataset/frames",
        metavar="FILE",
        help="path to the RGB frames",
    )
    parser.add_argument(
        "-o",
        "--output_dir",
        default="/media/DATA/HEVI_dataset/detections",
        metavar="FILE",
        help="path to save detection results as numpy",
    )
    parser.add_argument('--gpu', default='0', type=str)
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    parser.add_argument("--save_features", type=bool, default=False)

    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    output_dir = cfg.OUTPUT_DIR
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    # initialize model, load checkpointys
    model = build_detection_model(cfg, save_features=args.save_features)
    model.to(cfg.MODEL.DEVICE)

    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    _ = checkpointer.load(cfg.MODEL.WEIGHT)
    model.eval()

    # get image transform operator
    transform = build_transform(cfg)

    data_loader = make_data_loader(cfg,
                                   is_train=False,
                                   is_distributed=distributed)
    data_loader = data_loader[0]
    _ = checkpointer.load(cfg.MODEL.WEIGHT)

    results_dict = {}
    for i, batch in enumerate(tqdm(data_loader)):
        images, targets, image_ids = batch
        images = images.to(cfg.MODEL.DEVICE)
        with torch.no_grad():
            output = model(images)
            tmp = []
            for j, o in enumerate(output):
                o = o.to('cpu')
                # if convert_pred_coco2cityscapes:
                #     o = coco2cityscapes_label(o)
                output[j] = o
        # results_dict.update(
        #     {img_id: result for img_id, result in zip(image_ids, output)}
        # )

        for o, t in zip(output, targets):
            # if t['video_name'] not in results_dict:
            #     results_dict[t['video_name']] = [o]
            # else:
            #     results_dict[t['video_name']].append(o)

            #-------------------
            # NOTE: convert from Cityscapes ID to BDD id
            # labels = o.get_field('labels')
            # for i in range(len(labels)):
            #     labels[i] = City2BDD_id_map[int(labels[i])]
            # o.add_field('labels', labels)

            # o.resize((1280, 720))
            #-------------------

            save_path = os.path.join(output_dir, t.extra_fields['video_name'])
            if not os.path.exists(save_path):
                os.makedirs(save_path)
            save_path = os.path.join(
                save_path,
                str(t.extra_fields['frame_id']).zfill(6) + '.pth')
            torch.save(o, save_path)
    '''
Exemple #9
0
def train(cfg, local_rank, distributed, ckpt=None, cls_id=1):
    model = build_detection_model(cfg)
    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    _ = checkpointer.load(ckpt)

    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    adv_patch_cpu = torch.rand(3, 250, 150)
    # adv_patch_cpu = torch.rand(3, 1, 1)
    adv_patch_cpu.requires_grad_(True)


    optimizer = torch.optim.Adam([adv_patch_cpu], lr=0.1,  amsgrad=True)
    # scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=cfg.SOLVER.STEPS)
    # optimizer = torch.optim.SGD([adv_patch_cpu], lr=cfg.SOLVER.BASE_LR, momentum=cfg.SOLVER.MOMENTUM)

    # optimizer = make_optimizer(cfg, model)
    scheduler = make_lr_scheduler(cfg, optimizer)

    # # Initialize mixed-precision training
    # use_mixed_precision = cfg.DTYPE == "float16"
    # amp_opt_level = 'O1' if use_mixed_precision else 'O0'
    # model, optimizer = amp.initialize([adv_patch], optimizer, opt_level=amp_opt_level)

    if distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model, device_ids=[local_rank], output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
            find_unused_parameters=True
        )

    arguments = {}
    arguments["iteration"] = 0
    arguments["cls_id"] = cls_id
    
    patch_applier = PatchApplier().to(device)
    patch_transformer = PatchTransformer().to(device)

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(
        cfg, adv_patch_cpu, optimizer, scheduler, output_dir, save_to_disk
    )
    # extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)
    # arguments.update(extra_checkpoint_data)

    data_loader = make_data_loader(
        cfg,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"],
    )

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    do_train(
        model,
        data_loader,
        optimizer,
        scheduler,
        checkpointer,
        device,
        checkpoint_period,
        adv_patch_cpu,
        patch_transformer,
        patch_applier,
        arguments,
    )

    return model
Exemple #10
0
def train(cfg, local_rank, distributed):
    model = build_detection_model(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    optimizer = make_optimizer(cfg, model)
    scheduler = make_lr_scheduler(cfg, optimizer)

    if not 'search' in cfg.MODEL.BACKBONE.CONV_BODY:
        # Initialize mixed-precision training
        use_mixed_precision = cfg.DTYPE == "float16"
        amp_opt_level = 'O1' if use_mixed_precision else 'O0'
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level=amp_opt_level)

    if distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[local_rank],
            output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
            find_unused_parameters=True
            if 'search' in cfg.MODEL.BACKBONE.CONV_BODY else False,
        )

    if 'search' in cfg.MODEL.BACKBONE.CONV_BODY:

        def forward_hook(module: Module, inp: (Tensor, )):
            if module.weight is not None:
                module.weight.requires_grad = True
            if module.bias is not None:
                module.bias.requires_grad = True

        all_modules = (
            nn.Conv2d,
            nn.Linear,
            nn.BatchNorm2d,
        )
        for m in model.modules():
            if isinstance(m, all_modules):
                m.register_forward_pre_hook(forward_hook)

    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler,
                                         output_dir, save_to_disk)
    extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)
    arguments.update(extra_checkpoint_data)

    data_loader = make_data_loader(
        cfg,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"],
    )

    test_period = cfg.SOLVER.TEST_PERIOD
    if test_period > 0:
        data_loader_val = make_data_loader(cfg,
                                           is_train=False,
                                           is_distributed=distributed,
                                           is_for_period=True)
    else:
        data_loader_val = None

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    do_train(
        cfg,
        model,
        data_loader,
        data_loader_val,
        optimizer,
        scheduler,
        checkpointer,
        device,
        checkpoint_period,
        test_period,
        arguments,
    )

    return model
Exemple #11
0
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default=
        "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "--ckpt",
        help=
        "The path to the checkpoint for test, default is the latest checkpoint.",
        default=None,
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt
    _ = checkpointer.load(ckpt, use_latest=args.ckpt is None)
    """
    # Initialize mixed-precision if necessary
    use_mixed_precision = cfg.DTYPE == 'float16'
    amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt
    _ = checkpointer.load(ckpt, use_latest=args.ckpt is None)

    iou_types = ("bbox",)
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm",)
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints",)
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val):
        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        synchronize()
    """

    from maskrcnn_benchmark.data.transforms.build import build_transforms
    from PIL import Image
    import torchvision.transforms.functional as F
    transform = build_transforms(cfg, is_train=False)

    img_dir = "demo_imgs"
    res_dir = "demo_res"
    model.eval()
    imgs = os.listdir(img_dir)
    for img in imgs:
        img_path = os.path.join(img_dir, img)
        img_pil = Image.open(img_path)
        # for i in range( 360 ):
        original_img = img_pil
        # original_img = F.rotate( img_pil, 45, expand=True )

        origin_w, origin_h = original_img.size
        img, target = transform(original_img, None)
        print(img.shape)
        img = img.view((1, img.shape[0], img.shape[1], img.shape[2]))
        h, w = img.shape[2:]
        if h % 32 != 0:
            new_h = (h // 32 + 1) * 32
        else:
            new_h = h
        if w % 32 != 0:
            new_w = (w // 32 + 1) * 32
        else:
            new_w = w

        ratio_w = 1. * new_w / w
        ratio_h = 1. * new_h / h

        padded_img = torch.zeros((1, 3, new_h, new_w)).float()
        padded_img[:, :, :h, :w] = img

        prediction = model(padded_img.cuda())[0]
        prediction = prediction.resize(
            (origin_w * ratio_w, origin_h * ratio_h))
        hboxes = prediction.bbox.cpu()
        rboxes = prediction.get_field("rboxes").cpu()
        ratios = prediction.get_field("ratios").cpu()
        scores = prediction.get_field("scores").cpu()
        # labels = prediction.get_field( "labels" ).cpu()

        for rbox, ratio, score in zip(rboxes, ratios, scores):
            print(rbox)
            print(ratio, score)

        h_idx = ratios > 0.8
        # print(hboxes)
        h = hboxes[h_idx]
        hboxes_vtx = torch.stack([
            h[:, 0], h[:, 1], h[:, 2], h[:, 1], h[:, 2], h[:, 3], h[:, 0], h[:,
                                                                             3]
        ]).permute(1, 0)
        rboxes[h_idx] = hboxes_vtx
        # rboxes = rboxes.data.numpy().astype( np.int32 )
        rboxes = rboxes.data.numpy()

        keep = poly_nms(
            np.hstack([rboxes,
                       scores.cpu().data.numpy()[:, np.newaxis]
                       ]).astype(np.double), 0.1)

        rboxes = rboxes[keep].astype(np.int32)
        scores = scores[keep]
        hboxes = hboxes[keep]

        keep = np.where(scores > 0.6)
        rboxes = rboxes[keep]
        scores = scores[keep].tolist()
        hboxes = hboxes[keep]

        # rboxes = list( map( minAreaRect, rboxes ) )
        if len(rboxes) > 0:
            rboxes = np.vstack(rboxes)
        else:
            rboxes = np.array(rboxes)

        # vis( img_info["file_name"], rboxes )

        # img = cv2.imread( original_img )
        img = np.array(original_img.convert("RGB"))[:, :, ::-1].copy()
        cv2.polylines(img,
                      rboxes.reshape(-1, 4, 2).astype(np.int32),
                      True, (0, 255, 255),
                      thickness=2,
                      lineType=cv2.LINE_AA)
        filename = img_path.split("/")[-1]
        cv2.imwrite("{}/{}".format(res_dir, filename), img)
Exemple #12
0
# logger.info("Using {} GPUs".format(num_gpus))
# logger.info(args)
#
# logger.info("Collecting env info (might take some time)")
# logger.info("\n" + collect_env_info())
#
# logger.info("Loaded configuration file {}".format(args.config_file))
# with open(args.config_file, "r") as cf:
#     config_str = "\n" + cf.read()
#     logger.info(config_str)
logger.info("Running with config:\n{}".format(cfg))

save_to_disk = get_rank() == 0
checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler,
                                     output_dir, save_to_disk)
extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)
arguments.update(extra_checkpoint_data)

checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

do_train(
    model,
    data_loader,
    optimizer,
    scheduler,
    checkpointer,
    device,
    checkpoint_period,
    arguments,
)
Exemple #13
0
def train(cfg, local_rank, distributed, use_tensorboard=False):
    model = build_detection_model(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    optimizer = make_optimizer(cfg, model)
    scheduler = make_lr_scheduler(cfg, optimizer)

    # Initialize mixed-precision training
    use_mixed_precision = cfg.DTYPE == "float16"
    amp_opt_level = 'O1' if use_mixed_precision else 'O0'
    model, optimizer = amp.initialize(model,
                                      optimizer,
                                      opt_level=amp_opt_level)

    if distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[local_rank],
            output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
        )

    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler,
                                         output_dir, save_to_disk)
    # load_scheduler_only_epoch will prefer the scheduler specified in the
    # config rather than the one in the checkpoint, and will load only the
    # last_epoch from the checkpoint.
    extra_checkpoint_data = checkpointer.load(
        cfg.MODEL.WEIGHT,
        load_model_only=cfg.MODEL.LOAD_ONLY_WEIGHTS,
        load_scheduler_only_epoch=True)
    if not cfg.MODEL.LOAD_ONLY_WEIGHTS:
        arguments.update(extra_checkpoint_data)

    data_loader = make_data_loader(
        cfg,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"],
    )

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD
    if use_tensorboard:
        meters = TensorboardLogger(log_dir=output_dir,
                                   exp_name=cfg.TENSORBOARD_EXP_NAME,
                                   start_iter=arguments['iteration'],
                                   delimiter="  ")
    else:
        meters = MetricLogger(delimiter="  ")

    do_train(model, data_loader, optimizer, scheduler, checkpointer, device,
             checkpoint_period, arguments, meters)

    return model
def main(cfg_text, cfg_segment):
    # Load saved LSTM network
    language_model = build_detection_model(cfg_text)
    language_model.to(cfg_text.MODEL.DEVICE)

    output_dir = cfg_text.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg_text,
                                         language_model,
                                         save_dir=output_dir)
    _ = checkpointer.load(cfg_text.MODEL.WEIGHT)
    language_model.eval()

    # Load saved segmentation network
    seg_model = SegmentationHelper(cfg_segment)

    # Split=False is Test set
    data_loaders = make_data_loader(cfg_text,
                                    split=False,
                                    is_distributed=False)
    for dataset_index, data_loader in enumerate(data_loaders):
        fine_gt = []
        seg_iou = []
        bbox_iou = []

        for index, instance in tqdm(
                enumerate(data_loader),
                desc=cfg_text.DATASETS.TEST[dataset_index]):
            #Group images
            image_indexes = [x.get_field('img_id')[0] for x in instance[0][2]]
            unique_indexes, unique_mask, unique_inverse = np.unique(
                image_indexes, return_index=True, return_inverse=True)

            with torch.no_grad():
                prediction = language_model(instance[0],
                                            device=cfg_text.MODEL.DEVICE)
                segmentation_prediction = seg_model.run_on_image(
                    instance[0][0][unique_mask])

            _, pred_ind = prediction[:, -1, :].max(1)

            for j in range(len(pred_ind)):
                segs = segmentation_prediction[unique_inverse[j]]
                label = pred_ind[j]

                ann_seg = instance[0][2][j].get_field('ann_target')[0]
                fine_gt.append(ann_seg.get_field('labels').item())

                label_mask = segs.get_field('labels') == label
                if any(label_mask):
                    score, top_ind = segs[label_mask].get_field('scores').max(
                        0)
                    top_seg = segs[label_mask][top_ind]

                    bbox_iou.append(
                        IOU(top_seg.bbox.tolist()[0],
                            ann_seg.bbox.tolist()[0]))
                    if top_seg.has_field('mask'):
                        top_mask = top_seg.get_field('mask').squeeze()
                        ann_mask = ann_seg.get_field('masks').masks[0].mask
                        seg_iou.append(IOU(top_mask, ann_mask))
                    else:
                        seg_iou.append(0.0)
                else:
                    bbox_iou.append(0.0)
                    seg_iou.append(0.0)

        with open(
                '{}/{}_baseline_report.txt'.format(
                    cfg_text.OUTPUT_DIR,
                    cfg_text.DATASETS.TEST[dataset_index]), 'w') as f:
            f.write("Mean Segmentation IOU: {}\n".format(np.mean(seg_iou)))
            f.write("Mean Bounding Box IOU: {}\n".format(np.mean(bbox_iou)))

            f.write("\n Class \t Seg IOU \t BBox IOU \t Support")
            for label in data_loaders[0].dataset.coco.cats.values():
                mask = torch.Tensor(fine_gt) == label['id']
                seg_iou = torch.Tensor(seg_iou)
                bbox_iou = torch.Tensor(bbox_iou)
                f.write("\n{} \t {:.2f} \t {:.2f} \t{:d}".format(
                    label['name'], torch.mean(seg_iou[mask]),
                    torch.mean(bbox_iou[mask]), torch.sum(mask)))
Exemple #15
0
def train(cfg, local_rank, distributed):
    # original = torch.load('/home/zoey/nas/zoey/github/maskrcnn-benchmark/checkpoints/renderpy150000/model_0025000.pth')
    #
    # new = {"model": original["model"]}
    # torch.save(new, '/home/zoey/nas/zoey/github/maskrcnn-benchmark/checkpoints/finetune/model_0000000.pth')

    model = build_detection_model(cfg)

    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    optimizer = make_optimizer(cfg, model)
    scheduler = make_lr_scheduler(cfg, optimizer)

    # Initialize mixed-precision training
    use_mixed_precision = cfg.DTYPE == "float16"
    amp_opt_level = 'O1' if use_mixed_precision else 'O0'
    model, optimizer = amp.initialize(model,
                                      optimizer,
                                      opt_level=amp_opt_level)

    if distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[local_rank],
            output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
        )

    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler,
                                         output_dir, save_to_disk)

    extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)
    # extra_checkpoint_data = checkpointer.load('/home/zoey/nas/zoey/github/maskrcnn-benchmark/checkpoints/renderpy150000/model_0025000.pth')
    arguments.update(extra_checkpoint_data)
    # print(cfg)
    data_loader = make_data_loader(
        cfg,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"],
    )

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    do_train(
        model,
        data_loader,
        optimizer,
        scheduler,
        checkpointer,
        device,
        checkpoint_period,
        arguments,
    )

    return model
def main():
    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())

    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default="configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    _ = checkpointer.load(cfg.MODEL.WEIGHT)

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        split=False,
                                        is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        predictions = inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            device=cfg.MODEL.DEVICE,
            output_folder=output_folder,
        )

        synchronize()
Exemple #17
0
def train(cfg, local_rank, distributed):
    model = build_detection_model(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    optimizer = make_optimizer(cfg, model)
    scheduler = make_lr_scheduler(cfg, optimizer)

    if use_amp:
        # Initialize mixed-precision training
        use_mixed_precision = cfg.DTYPE == "float16"

        amp_opt_level = 'O1' if use_mixed_precision else 'O0'
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level=amp_opt_level)

    if distributed:
        if use_apex_ddp:
            model = DDP(model, delay_allreduce=True)
        else:
            model = torch.nn.parallel.DistributedDataParallel(
                model,
                device_ids=[local_rank],
                output_device=local_rank,
                # this should be removed if we update BatchNorm stats
                broadcast_buffers=False,
            )

    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler,
                                         output_dir, save_to_disk)
    extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)
    arguments.update(extra_checkpoint_data)

    data_loader, iters_per_epoch = make_data_loader(
        cfg,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"],
    )

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    # set the callback function to evaluate and potentially
    # early exit each epoch
    if cfg.PER_EPOCH_EVAL:
        per_iter_callback_fn = functools.partial(
            mlperf_test_early_exit,
            iters_per_epoch=iters_per_epoch,
            tester=functools.partial(test, cfg=cfg),
            model=model,
            distributed=distributed,
            min_bbox_map=cfg.MIN_BBOX_MAP,
            min_segm_map=cfg.MIN_MASK_MAP)
    else:
        per_iter_callback_fn = None

    do_train(
        model,
        data_loader,
        optimizer,
        scheduler,
        checkpointer,
        device,
        checkpoint_period,
        arguments,
        use_amp,
        cfg,
        per_iter_end_callback_fn=per_iter_callback_fn,
    )

    return model
Exemple #18
0
def test_once(cfg, save_dir, weight_name, distributed):
    torch.cuda.empty_cache()

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    output_dir = save_dir
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    _ = checkpointer.load(weight_name, test=True)

    iou_types = ()  # ("bbox",)
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm",)
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST if not cfg.TEST.GEN else cfg.DATASETS.GEN
    if output_dir:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(output_dir, "inference", dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg, is_train=False,
                                        is_distributed=distributed)
    results = []
    if not cfg.TEST.GEN:
        for output_folder, dataset_name, data_loader_val in zip(output_folders,
                                                                dataset_names,
                                                                data_loaders_val):
            result, _ = inference(
                model,
                data_loader_val,
                dataset_name=dataset_name,
                iou_types=iou_types,
                box_only=cfg.MODEL.RPN_ONLY,
                device=cfg.MODEL.DEVICE,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=output_folder,
                generate_data=cfg.TEST.GEN,
                visual_num= cfg.TEST.VISUAL_NUM
            )
            # pdb.set_trace()
            results.append(result)

            synchronize()

        return results
    else:
        for output_folder, dataset_name, data_loader_val in zip(output_folders,
                                                                dataset_names,
                                                                data_loaders_val):
            inference(
                model,
                data_loader_val,
                dataset_name=dataset_name,
                iou_types=iou_types,
                box_only=cfg.MODEL.RPN_ONLY,
                device=cfg.MODEL.DEVICE,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=output_folder,
                generate_data=cfg.TEST.GEN
            )
def main():
    # torch.cuda.set_device(7)

    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default=
        "/home/SelfDriving/maskrcnn/maskrcnn-benchmark/configs/e2e_faster_rcnn_R_50_C4_1x.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "--ckpt",
        help=
        "The path to the checkpoint for test, default is the latest checkpoint.",
        default=None,
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    parser.add_argument(
        "--get_feature",
        help="get roi features and save",
        action='store_true',
        default=False,
    )

    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    # Initialize mixed-precision if necessary
    use_mixed_precision = cfg.DTYPE == 'float16'
    amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt
    _ = checkpointer.load(ckpt, use_latest=args.ckpt is None)

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
            get_feature=args.get_feature,
        )
        synchronize()
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('config_file')
  parser.add_argument('ckpt_file')
  parser.add_argument('image_dir')
  parser.add_argument('name_file')
  parser.add_argument('bbox_file')
  parser.add_argument('output_dir')
  parser.add_argument('--layer_name', default='fc7')
  parser.add_argument('--start_id', type=int, default=0)
  parser.add_argument('--end_id', type=int, default=None)
  opts = parser.parse_args()

  bbox_data = json.load(open(opts.bbox_file))

  if not os.path.exists(opts.output_dir):
    os.makedirs(opts.output_dir)

  ########### build model #############
  # update the config options with the config file
  cfg.merge_from_file(opts.config_file)
  # manual override some options
  cfg.merge_from_list(['MODEL.DEVICE', 'cuda:0'])
  cfg.freeze()

  device = torch.device(cfg.MODEL.DEVICE)
  cpu_device = torch.device("cpu")

  model = build_detection_model(cfg)
  model.to(device)
  model.eval()

  checkpointer = DetectronCheckpointer(cfg, model)
  _ = checkpointer.load(f=opts.ckpt_file, use_latest=False)

  transform_fn = build_transform(cfg)

  ########### extract feature #############
  names = np.load(opts.name_file)
  if opts.end_id is None:
    opts.end_id = len(names)
  total_images = opts.end_id - opts.start_id

  for i, name in enumerate(names):
    if i < opts.start_id or i >= opts.end_id:
      continue
    outname = name.replace('/', '_')
    outfile = os.path.join(opts.output_dir, '%s.hdf5'%outname)

    if os.path.exists(outfile):
      continue

    img_file = os.path.join(opts.image_dir, name)

    # apply pre-processing to image
    original_image = cv2.imread(img_file)
    height, width = original_image.shape[:-1]
    image = transform_fn(original_image)
    nheight, nwidth = image.size(1), image.size(2)

    # convert to an ImageList, padded so that it is divisible by
    # cfg.DATALOADER.SIZE_DIVISIBILITY
    image_list = to_image_list(image, cfg.DATALOADER.SIZE_DIVISIBILITY)
    image_list = image_list.to(device)

    # compute predictions: one image one mini-batch
    with torch.no_grad():
      # features: tuples in FPN (batch, dim_ft: 256, h, w)
      features = model.backbone(image_list.tensors)

      if name in bbox_data:
        cpu_boxes = bbox_data[name]
        boxes = torch.FloatTensor(cpu_boxes).to(device)
        cand_proposals = BoxList(boxes, (width, height), mode='xyxy')
        cand_proposals = cand_proposals.resize((nwidth, nheight))

        bbox_fts, _, _ = model.roi_heads.extract_features(features, [cand_proposals])
        bbox_fts = bbox_fts[opts.layer_name].cpu()

        # save to file
        with h5py.File(outfile, 'w') as outf:
          outf.create_dataset(outname, bbox_fts.size(), dtype='float', compression='gzip')
          outf[outname][...] = bbox_fts.data.numpy()
          outf[outname].attrs['image_w'] = width
          outf[outname].attrs['image_h'] = height
          outf[outname].attrs['boxes'] = np.array(cpu_boxes).astype(np.int32)

      if i % 1000 == 0:
        print('name %s shape %s, processing %d/%d (%.2f%% done)'%(name, 
          bbox_fts.shape, i-opts.start_id, total_images, (i-opts.start_id)*100/total_images))
def train(cfg, local_rank, distributed):
    model = build_detection_model(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    optimizer = make_optimizer(cfg, model)
    scheduler = make_lr_scheduler(cfg, optimizer)

    # Initialize mixed-precision training
    use_mixed_precision = cfg.DTYPE == "float16"
    amp_opt_level = 'O1' if use_mixed_precision else 'O0'
    model, optimizer = amp.initialize(model,
                                      optimizer,
                                      opt_level=amp_opt_level)

    if distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[local_rank],
            output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
        )

    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler,
                                         output_dir, save_to_disk)
    extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)
    arguments.update(extra_checkpoint_data)

    data_loader_src = make_data_loader(
        cfg,
        is_source=True,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"],
    )

    data_loader_trg = make_data_loader(
        cfg,
        is_source=False,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"],
    )

    test_period = cfg.SOLVER.TEST_PERIOD
    if test_period > 0:
        data_loader_val = make_data_loader(cfg,
                                           is_train=False,
                                           is_distributed=distributed,
                                           is_for_period=True)
    else:
        data_loader_val = None

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    do_train(
        cfg,
        model,
        data_loader_src,
        data_loader_trg,
        data_loader_val,
        optimizer,
        scheduler,
        checkpointer,
        device,
        checkpoint_period,
        test_period,
        arguments,
    )

    return model
Exemple #22
0
def train(cfg, local_rank, distributed):
    # Model logging
    print_mlperf(key=mlperf_log.INPUT_BATCH_SIZE, value=cfg.SOLVER.IMS_PER_BATCH)
    print_mlperf(key=mlperf_log.BATCH_SIZE_TEST, value=cfg.TEST.IMS_PER_BATCH)

    print_mlperf(key=mlperf_log.INPUT_MEAN_SUBTRACTION, value = cfg.INPUT.PIXEL_MEAN)
    print_mlperf(key=mlperf_log.INPUT_NORMALIZATION_STD, value=cfg.INPUT.PIXEL_STD)
    print_mlperf(key=mlperf_log.INPUT_RESIZE)
    print_mlperf(key=mlperf_log.INPUT_RESIZE_ASPECT_PRESERVING)
    print_mlperf(key=mlperf_log.MIN_IMAGE_SIZE, value=cfg.INPUT.MIN_SIZE_TRAIN)
    print_mlperf(key=mlperf_log.MAX_IMAGE_SIZE, value=cfg.INPUT.MAX_SIZE_TRAIN)
    print_mlperf(key=mlperf_log.INPUT_RANDOM_FLIP)
    print_mlperf(key=mlperf_log.RANDOM_FLIP_PROBABILITY, value=0.5)
    print_mlperf(key=mlperf_log.FG_IOU_THRESHOLD, value=cfg.MODEL.RPN.FG_IOU_THRESHOLD)
    print_mlperf(key=mlperf_log.BG_IOU_THRESHOLD, value=cfg.MODEL.RPN.BG_IOU_THRESHOLD)
    print_mlperf(key=mlperf_log.RPN_PRE_NMS_TOP_N_TRAIN, value=cfg.MODEL.RPN.PRE_NMS_TOP_N_TRAIN)
    print_mlperf(key=mlperf_log.RPN_PRE_NMS_TOP_N_TEST, value=cfg.MODEL.RPN.PRE_NMS_TOP_N_TEST)
    print_mlperf(key=mlperf_log.RPN_POST_NMS_TOP_N_TRAIN, value=cfg.MODEL.RPN.FPN_POST_NMS_TOP_N_TRAIN)
    print_mlperf(key=mlperf_log.RPN_POST_NMS_TOP_N_TEST, value=cfg.MODEL.RPN.FPN_POST_NMS_TOP_N_TEST)
    print_mlperf(key=mlperf_log.ASPECT_RATIOS, value=cfg.MODEL.RPN.ASPECT_RATIOS)
    print_mlperf(key=mlperf_log.BACKBONE, value=cfg.MODEL.BACKBONE.CONV_BODY)
    print_mlperf(key=mlperf_log.NMS_THRESHOLD, value=cfg.MODEL.RPN.NMS_THRESH)
    # /root/ssy/maskrcnn-benchmark/maskrcnn_benchmark/modeling/detector/detectors.py
    # building bare mode without doing anthing
    model = build_detection_model(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    optimizer = make_optimizer(cfg, model)
    # Optimizer logging
    print_mlperf(key=mlperf_log.OPT_NAME, value=mlperf_log.SGD_WITH_MOMENTUM)
    print_mlperf(key=mlperf_log.OPT_LR, value=cfg.SOLVER.BASE_LR)
    print_mlperf(key=mlperf_log.OPT_MOMENTUM, value=cfg.SOLVER.MOMENTUM)
    print_mlperf(key=mlperf_log.OPT_WEIGHT_DECAY, value=cfg.SOLVER.WEIGHT_DECAY)


    scheduler = make_lr_scheduler(cfg, optimizer)

    if distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model, device_ids=[local_rank], output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
        )

    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(
        cfg, model, optimizer, scheduler, output_dir, save_to_disk
    )
    # no such SAVE_CHECKPOINTS
    # maskrcnn_benchmark/engine/trainer.py will use save_checkpoints 
    #arguments["save_checkpoints"] = cfg.SAVE_CHECKPOINTS
    arguments["save_checkpoints"] = True

    extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)
    arguments.update(extra_checkpoint_data)

    data_loader, iters_per_epoch = make_data_loader(
        cfg,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"]
    )
    print("SSY iters_per_epoch "+str(iters_per_epoch))
    #print("SSY iters_per_epoch change to 100 ")
    #iters_per_epoch = 100

    #checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD
    checkpoint_period = iters_per_epoch
    #checkpoint_period =100 

    # set the callback function to evaluate and potentially
    # early exit each epoch
    # SSY
    # I already add PER_EPOCH_EVAL and MIN_BBOX_MAP MIN_SEGM_MAP to  ./configs/e2e_mask_rcnn_R_50_FPN_1x.yaml
    # but it still can not find it
    # so I manually set them here
    #if cfg.PER_EPOCH_EVAL:
    #    per_iter_callback_fn = functools.partial(
    #            mlperf_test_early_exit,
    #            iters_per_epoch=iters_per_epoch,
    #            tester=functools.partial(test, cfg=cfg),
    #            model=model,
    #            distributed=distributed,
    #            min_bbox_map=cfg.MLPERF.MIN_BBOX_MAP,
    #            min_segm_map=cfg.MLPERF.MIN_SEGM_MAP)
    #else:
    #    per_iter_callback_fn = None
    per_iter_callback_fn = functools.partial(
            mlperf_test_early_exit,
            iters_per_epoch=iters_per_epoch,
            # /root/ssy/maskrcnn-benchmark/maskrcnn_benchmark/engine/tester.py
            tester=functools.partial(test, cfg=cfg),
            model=model,
            distributed=distributed,
            min_bbox_map=0.377,
            min_segm_map=0.339)

    start_train_time = time.time()
    # /root/ssy/maskrcnn-benchmark/maskrcnn_benchmark/engine/trainer.py
    do_train(
        model,
        data_loader,
        optimizer,
        scheduler,
        checkpointer,
        device,
        checkpoint_period,
        arguments,
        per_iter_start_callback_fn=functools.partial(mlperf_log_epoch_start, iters_per_epoch=iters_per_epoch),
        per_iter_end_callback_fn=per_iter_callback_fn,
    )

    end_train_time = time.time()
    total_training_time = end_train_time - start_train_time
    print(
            "&&&& MLPERF METRIC THROUGHPUT per GPU={:.4f} iterations / s".format((arguments["iteration"] * 1.0) / total_training_time)
    )

    return model
Exemple #23
0
def main():
    #     apply_prior   prior_mask
    # 0        -             -
    # 1        Y             -
    # 2        -             Y
    # 3        Y             Y
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "--ckpt",
        help=
        "The path to the checkpoint for test, default is the latest checkpoint.",
        default=None,
    )
    parser.add_argument(
        "--dataset_name",
        help="vcoco_test or vcoco_val_test",
        default=None,
    )
    parser.add_argument('--num_iteration',
                        dest='num_iteration',
                        help='Specify which weight to load',
                        default=-1,
                        type=int)
    parser.add_argument('--object_thres',
                        dest='object_thres',
                        help='Object threshold',
                        default=0.1,
                        type=float)  # used to be 0.4 or 0.05
    parser.add_argument('--human_thres',
                        dest='human_thres',
                        help='Human threshold',
                        default=0.8,
                        type=float)
    parser.add_argument('--prior_flag',
                        dest='prior_flag',
                        help='whether use prior_flag',
                        default=1,
                        type=int)
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1 and torch.cuda.is_available()

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    print('prior flag: {}'.format(args.prior_flag))

    ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
    # DATA_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'Data'))
    args.config_file = os.path.join(ROOT_DIR, args.config_file)

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("DRG", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    # model.to(cfg.MODEL.DEVICE)
    device = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")
    model.to(device)

    # Initialize mixed-precision if necessary
    use_mixed_precision = cfg.DTYPE == 'float16'
    amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)

    if args.num_iteration != -1:
        args.ckpt = os.path.join(cfg.OUTPUT_DIR,
                                 'model_%07d.pth' % args.num_iteration)
    ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt
    logger.info("Testing checkpoint {}".format(ckpt))
    _ = checkpointer.load(ckpt, use_latest=args.ckpt is None)

    # iou_types = ("bbox",)
    output_folders = [None] * len(cfg.DATASETS.TEST)
    # dataset_names = cfg.DATASETS.TEST
    dataset_names = (args.dataset_name, )

    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            if args.num_iteration != -1:
                output_folder = os.path.join(cfg.OUTPUT_DIR, "inference_sp",
                                             dataset_name,
                                             "model_%07d" % args.num_iteration)
            else:
                output_folder = os.path.join(cfg.OUTPUT_DIR, "inference_sp",
                                             dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder

    opt = {}
    # opt['word_dim'] = 300
    for output_folder, dataset_name in zip(output_folders, dataset_names):
        data = DatasetCatalog.get(dataset_name)
        data_args = data["args"]
        im_dir = data_args['im_dir']
        test_detection = pickle.load(open(data_args['test_detection_file'],
                                          "rb"),
                                     encoding='latin1')
        prior_mask = pickle.load(open(data_args['prior_mask'], "rb"),
                                 encoding='latin1')
        action_dic = json.load(open(data_args['action_index']))
        action_dic_inv = {y: x for x, y in action_dic.items()}
        vcoco_test_ids = open(data_args['vcoco_test_ids_file'], 'r')
        test_image_id_list = [int(line.rstrip()) for line in vcoco_test_ids]
        vcocoeval = VCOCOeval(data_args['vcoco_test_file'],
                              data_args['ann_file'],
                              data_args['vcoco_test_ids_file'])
        word_embeddings = pickle.load(open(data_args['word_embedding_file'],
                                           "rb"),
                                      encoding='latin1')
        output_file = os.path.join(output_folder, 'detection.pkl')
        output_dict_file = os.path.join(
            output_folder, 'detection_human_{}_new.pkl'.format(dataset_name))

        logger.info("Output will be saved in {}".format(output_file))
        logger.info("Start evaluation on {} dataset({} images).".format(
            dataset_name, len(test_image_id_list)))

        run_test(model,
                 dataset_name=dataset_name,
                 im_dir=im_dir,
                 test_detection=test_detection,
                 word_embeddings=word_embeddings,
                 test_image_id_list=test_image_id_list,
                 prior_mask=prior_mask,
                 action_dic_inv=action_dic_inv,
                 output_file=output_file,
                 output_dict_file=output_dict_file,
                 object_thres=args.object_thres,
                 human_thres=args.human_thres,
                 prior_flag=args.prior_flag,
                 device=device,
                 cfg=cfg)

        synchronize()

        vcocoeval._do_eval(output_file, ovr_thresh=0.5)
def main():
    parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default="configs/visual_genome_vqa/e2e_faster_rcnn_X-101-64x4d-FPN_1x_MLP_2048_FPN_512_vqa_single.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument(
        "--save_path",
        default="/checkpoint/meetshah/features/vqa/pytorch/resnext101_64x4d_mlp_2048_fpn_512/",
    )
    parser.add_argument(
        "--feat_name",
        default="fc6",
    )
    parser.add_argument(
        "--n_groups",
        default=0,
    )
    parser.add_argument(
        "--group_id",
        default=1,
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    if not os.path.exists(args.save_path):
        os.makedirs(args.save_path)

    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl", init_method="env://")

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    _ = checkpointer.load(cfg.MODEL.WEIGHT)

    iou_types = ("bbox",)
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm",)
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(
        output_folders, dataset_names, data_loaders_val
    ):
        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
            save_path=args.save_path,
            feat_name=args.feat_name,
            group_id=args.group_id,
            n_groups=args.n_groups,
        )
        synchronize()
def main():
    parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.deprecated.init_process_group(
            backend="nccl", init_method="env://"
        )

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    _ = checkpointer.load(cfg.MODEL.WEIGHT)

    iou_types = ("bbox",)
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm",)
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val):
        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        synchronize()
Exemple #26
0
def train(cfg, local_rank, distributed, d_path=None):

    MaskDnet = MaskDiscriminator(nc=256)
    BBoxDnet = BoxDiscriminator(nc=256, ndf=64)
    Dnet = CombinedDiscriminator(MaskDnet, BBoxDnet)
    model = Mask_RCNN(cfg)
    g_rcnn = GAN_RCNN(model, Dnet)

    device = torch.device(cfg.MODEL.DEVICE)
    g_rcnn.to(device)

    g_optimizer = make_optimizer(cfg, model)
    d_optimizer = make_D_optimizer(cfg, Dnet)

    g_scheduler = make_lr_scheduler(cfg, g_optimizer)
    d_scheduler = make_lr_scheduler(cfg, d_optimizer)
    # model.BoxDnet = BBoxDnet

    # Initialize mixed-precision training
    use_mixed_precision = cfg.DTYPE == "float16"
    amp_opt_level = 'O1' if use_mixed_precision else 'O0'
    model, g_optimizer = amp.initialize(model,
                                        g_optimizer,
                                        opt_level=amp_opt_level)
    Dnet, d_optimizer = amp.initialize(Dnet,
                                       d_optimizer,
                                       opt_level=amp_opt_level)

    if distributed:
        g_rcnn = torch.nn.parallel.DistributedDataParallel(
            g_rcnn,
            device_ids=[local_rank],
            output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
        )

    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(cfg, model, g_optimizer, g_scheduler,
                                         output_dir, save_to_disk)

    extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)

    arguments.update(extra_checkpoint_data)

    d_checkpointer = DetectronCheckpointer(cfg, Dnet, d_optimizer, d_scheduler,
                                           output_dir, save_to_disk)

    if d_path:
        d_checkpointer.load(d_path, use_latest=False)

    data_loader = make_data_loader(
        cfg,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"],
    )

    test_period = cfg.SOLVER.TEST_PERIOD
    data_loader_val = make_data_loader(cfg,
                                       is_train=False,
                                       is_distributed=distributed,
                                       is_for_period=True)

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    ## START TRAINING
    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")

    meters = TensorboardLogger(log_dir=cfg.OUTPUT_DIR + "/tensorboardX",
                               start_iter=arguments['iteration'],
                               delimiter="  ")

    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    g_rcnn.train()
    start_training_time = time.time()
    end = time.time()

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )

    dataset_names = cfg.DATASETS.TEST

    for iteration, (images, targets, _) in enumerate(data_loader, start_iter):

        if any(len(target) < 1 for target in targets):
            logger.error(
                f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}"
            )
            continue
        data_time = time.time() - end
        iteration = iteration + 1
        arguments["iteration"] = iteration

        images = images.to(device)
        targets = [target.to(device) for target in targets]

        # NOTE
        g_loss_dict, d_loss_dict = g_rcnn(images, targets)

        g_losses = sum(loss for loss in g_loss_dict.values())
        d_losses = sum(loss for loss in d_loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        g_loss_dict_reduced = reduce_loss_dict(g_loss_dict)
        g_losses_reduced = sum(loss for loss in g_loss_dict_reduced.values())

        d_loss_dict_reduced = reduce_loss_dict(d_loss_dict)
        d_losses_reduced = sum(loss for loss in d_loss_dict_reduced.values())

        meters.update(total_g_loss=g_losses_reduced, **g_loss_dict_reduced)
        meters.update(total_d_loss=d_losses_reduced, **d_loss_dict_reduced)

        g_optimizer.zero_grad()
        # Note: If mixed precision is not used, this ends up doing nothing
        # Otherwise apply loss scaling for mixed-precision recipe
        with amp.scale_loss(g_losses, g_optimizer) as g_scaled_losses:
            g_scaled_losses.backward()
        g_optimizer.step()
        g_scheduler.step()

        d_optimizer.zero_grad()
        # Note: If mixed precision is not used, this ends up doing nothing
        # Otherwise apply loss scaling for mixed-precision recipe
        with amp.scale_loss(d_losses, d_optimizer) as d_scaled_losses:
            d_scaled_losses.backward()
        d_optimizer.step()
        d_scheduler.step()

        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time, data=data_time)

        eta_seconds = meters.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        if iteration % 20 == 0 or iteration == max_iter:
            logger.info(
                meters.delimiter.join([
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters),
                    lr=g_optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)
            d_checkpointer.save("dnet_{:07d}".format(iteration), **arguments)

        if data_loader_val is not None and test_period > 0 and iteration % test_period == 0:
            meters_val = MetricLogger(delimiter="  ")
            synchronize()
            _ = inference(  # The result can be used for additional logging, e. g. for TensorBoard
                model,
                # The method changes the segmentation mask format in a data loader,
                # so every time a new data loader is created:
                make_data_loader(cfg,
                                 is_train=False,
                                 is_distributed=False,
                                 is_for_period=True),
                dataset_name="[Validation]",
                iou_types=iou_types,
                box_only=False
                if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
                device=cfg.MODEL.DEVICE,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=cfg.OUTPUT_DIR,
            )
            synchronize()
            model.train()
            with torch.no_grad():
                # Should be one image for each GPU:
                for iteration_val, (images_val, targets_val,
                                    _) in enumerate(tqdm(data_loader_val)):
                    images_val = images_val.to(device)
                    targets_val = [target.to(device) for target in targets_val]
                    loss_dict = model(images_val, targets_val)
                    losses = sum(loss for loss in loss_dict.values())
                    loss_dict_reduced = reduce_loss_dict(loss_dict)
                    losses_reduced = sum(
                        loss for loss in loss_dict_reduced.values())
                    meters_val.update(loss=losses_reduced, **loss_dict_reduced)
            synchronize()
            logger.info(
                meters_val.delimiter.join([
                    "[Validation]: ",
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters_val),
                    lr=g_optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))

        if iteration == max_iter:
            checkpointer.save("model_final", **arguments)

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / (max_iter)))