Exemple #1
0
    def __init__(self, backbone=None, num_classes=21):
        super(SSD300, self).__init__()
        if backbone is None:
            raise Exception("backbone is None")
        if not hasattr(backbone, "out_channels"):
            raise Exception("the backbone not has attribute: out_channel")
        self.feature_extractor = backbone  # 把传入的backbone定义给ssd的feature_extractor

        self.num_classes = num_classes
        # out_channels = [1024, 512, 512, 256, 256, 256] for resnet50,添加一系列卷积层做为特征提取层
        self._build_additional_features(self.feature_extractor.out_channels)
        # 第一个特征层层和最后两个特征层默认每个点生成四个大小的框, 其他层每个点默认生成6个
        self.num_defaults = [4, 6, 6, 6, 4, 4]
        location_extractors = []  # 定位预测器,使用3*3卷积来预测定位
        confidence_extractors = []  # 置信度预测器, 同样使用3*3卷积来预测

        # out_channels = [1024, 512, 512, 256, 256, 256] for resnet50
        for nd, oc in zip(self.num_defaults,
                          self.feature_extractor.out_channels):
            # nd is number_default_boxes, oc is output_channel
            location_extractors.append(
                nn.Conv2d(oc, nd * 4, kernel_size=3, padding=1))
            confidence_extractors.append(
                nn.Conv2d(oc, nd * self.num_classes, kernel_size=3, padding=1))

        self.loc = nn.ModuleList(location_extractors)
        self.conf = nn.ModuleList(confidence_extractors)
        self._init_weights()

        # default_box.dboxes  (8732*4)
        default_box = dboxes300_coco()  # 通过该函数生成默认的8732个default box
        self.compute_loss = Loss(default_box)
        self.encoder = Encoder(default_box)
        self.postprocess = PostProcess(default_box)
Exemple #2
0
    def __init__(self, backbone=None, num_classes=21, pretrain_path=None):
        super(SSD640, self).__init__()
        if backbone is None:
            raise Exception("backbone is None")
        if not hasattr(backbone, "out_channels"):
            raise Exception("the backbone not has attribute: out_channel")

        self.feature_extractor = backbone
        if pretrain_path is not None:
            self.feature_extractor.load_state_dict(torch.load(pretrain_path))

        self.num_classes = num_classes
        # self._build_additional_features([2048, 1024, 512, 256])

        self.num_defaults = [1, 2]
        location_extractors = []
        confidence_extractors = []

        for nd, oc in zip(self.num_defaults,
                          self.feature_extractor.out_channels):
            # nd is number_default_boxes, oc is output_channel
            location_extractors.append(
                nn.Conv2d(oc, nd * 4, kernel_size=3, padding=1))
            confidence_extractors.append(
                nn.Conv2d(oc, nd * self.num_classes, kernel_size=3, padding=1))

        self.loc = nn.ModuleList(location_extractors)
        self.conf = nn.ModuleList(confidence_extractors)
        self._init_weights()

        self.default_box = dboxes300_coco()
        self.compute_loss = Loss(self.default_box)
        self.encoder = Encoder(self.default_box)
        self.postprocess = PostProcess(self.default_box)
Exemple #3
0
    def __init__(self, backbone=None, num_classes=21):
        super(SSD300, self).__init__()
        if backbone is None:
            raise Exception("backbone is None")
        if not hasattr(backbone, "out_channels"):
            raise Exception("the backbone not has attribute: out_channel")
        self.feature_extractor = backbone

        self.num_classes = num_classes
        # out_channels = [1024, 512, 512, 256, 256, 256] for resnet50
        self._build_additional_features(self.feature_extractor.out_channels)
        self.num_defaults = [4, 6, 6, 6, 4, 4]
        location_extractors = []
        confidence_extractors = []

        # out_channels = [1024, 512, 512, 256, 256, 256] for resnet50
        for nd, oc in zip(self.num_defaults,
                          self.feature_extractor.out_channels):
            # nd is number_default_boxes, oc is output_channel
            location_extractors.append(
                nn.Conv2d(oc, nd * 4, kernel_size=3, padding=1))
            confidence_extractors.append(
                nn.Conv2d(oc, nd * self.num_classes, kernel_size=3, padding=1))

        self.loc = nn.ModuleList(location_extractors)
        self.conf = nn.ModuleList(confidence_extractors)
        self._init_weights()

        default_box = dboxes300_coco()
        self.compute_loss = Loss(default_box)
        self.encoder = Encoder(default_box)
        self.postprocess = PostProcess(default_box)
Exemple #4
0
def decode_results(predictions):
    dboxes = dboxes300_coco()
    encoder = Encoder(dboxes)
    ploc, plabel = [val.float() for val in predictions]
    results = encoder.decode_batch(ploc, plabel, criteria=0.5, max_output=20)

    return [[pred.detach().cpu().numpy() for pred in detections]
            for detections in results]
Exemple #5
0
def get_val_dataset(args):
    dboxes = dboxes300_coco()
    val_trans = SSDTransformer(dboxes, (300, 300), val=True)

    val_annotate = os.path.join(args.data, "annotations/instances_val2017.json")
    val_coco_root = os.path.join(args.data, "val2017")

    val_coco = COCODetection(val_coco_root, val_annotate, val_trans)
    return val_coco
Exemple #6
0
 def __init__(self):
     self.sample_options = (
         # Do nothing
         None,
         # min IoU, max IoU
         (0.1, None),
         (0.3, None),
         (0.5, None),
         (0.7, None),
         (0.9, None),
         # no IoU requirements
         (None, None),
     )
     self.dboxes = dboxes300_coco()
def train(train_loop_func, logger, args):
    if args.amp:
        amp_handle = amp.init(enabled=args.fp16)
    # Check that GPUs are actually available
    use_cuda = not args.no_cuda

    # Setup multi-GPU if necessary
    args.distributed = False
    if 'WORLD_SIZE' in os.environ:
        args.distributed = int(os.environ['WORLD_SIZE']) > 1

    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        args.N_gpu = torch.distributed.get_world_size()
    else:
        args.N_gpu = 1

    if args.seed is None:
        args.seed = np.random.randint(1e4)

    if args.distributed:
        args.seed = (args.seed + torch.distributed.get_rank()) % 2**32
    print("Using seed = {}".format(args.seed))
    torch.manual_seed(args.seed)
    np.random.seed(seed=args.seed)

    # Setup data, defaults
    dboxes = dboxes300_coco()
    encoder = Encoder(dboxes)
    cocoGt = get_coco_ground_truth(args)

    train_loader = get_train_loader(args, args.seed - 2**31)

    val_dataset = get_val_dataset(args)
    val_dataloader = get_val_dataloader(val_dataset, args)

    ssd300 = SSD300(backbone=args.backbone)
    args.learning_rate = args.learning_rate * args.N_gpu * (args.batch_size /
                                                            32)
    start_epoch = 0
    iteration = 0
    loss_func = Loss(dboxes)

    if use_cuda:
        ssd300.cuda()
        loss_func.cuda()

    if args.fp16 and not args.amp:
        ssd300 = network_to_half(ssd300)

    if args.distributed:
        ssd300 = DDP(ssd300)

    optimizer = torch.optim.SGD(tencent_trick(ssd300),
                                lr=args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    scheduler = MultiStepLR(optimizer=optimizer,
                            milestones=args.multistep,
                            gamma=0.1)
    if args.fp16:
        if args.amp:
            optimizer = amp_handle.wrap_optimizer(optimizer)
        else:
            optimizer = FP16_Optimizer(optimizer, static_loss_scale=128.)
    if args.checkpoint is not None:
        if os.path.isfile(args.checkpoint):
            load_checkpoint(ssd300, args.checkpoint)
            checkpoint = torch.load(args.checkpoint,
                                    map_location=lambda storage, loc: storage.
                                    cuda(torch.cuda.current_device()))
            start_epoch = checkpoint['epoch']
            iteration = checkpoint['iteration']
            scheduler.load_state_dict(checkpoint['scheduler'])
            ssd300.load_state_dict(checkpoint['model'])
            optimizer.load_state_dict(checkpoint['optimizer'])
        else:
            print('Provided checkpoint is not path to a file')
            return

    inv_map = {v: k for k, v in val_dataset.label_map.items()}

    total_time = 0

    if args.mode == 'evaluation':
        acc = evaluate(ssd300, val_dataloader, cocoGt, encoder, inv_map, args)
        if args.local_rank == 0:
            print('Model precision {} mAP'.format(acc))

        return
    mean, std = generate_mean_std(args)

    for epoch in range(start_epoch, args.epochs):
        start_epoch_time = time.time()
        scheduler.step()
        iteration = train_loop_func(ssd300, loss_func, epoch, optimizer,
                                    train_loader, val_dataloader, encoder,
                                    iteration, logger, args, mean, std)
        end_epoch_time = time.time() - start_epoch_time
        total_time += end_epoch_time

        if args.local_rank == 0:
            logger.update_epoch_time(epoch, end_epoch_time)

        if epoch in args.evaluation:
            acc = evaluate(ssd300, val_dataloader, cocoGt, encoder, inv_map,
                           args)

            if args.local_rank == 0:
                logger.update_epoch(epoch, acc)

        if args.save and args.local_rank == 0:
            print("saving model...")
            obj = {
                'epoch': epoch + 1,
                'iteration': iteration,
                'optimizer': optimizer.state_dict(),
                'scheduler': scheduler.state_dict(),
                'label_map': val_dataset.label_info
            }
            if args.distributed:
                obj['model'] = ssd300.module.state_dict()
            else:
                obj['model'] = ssd300.state_dict()
            torch.save(obj, './models/epoch_{}.pt'.format(epoch))
        train_loader.reset()
    print('total training time: {}'.format(total_time))
def train(train_loop_func, logger, args):
    # Check that GPUs are actually available
    use_cuda = not args.no_cuda
    train_samples = 118287

    # Setup multi-GPU if necessary
    args.distributed = False
    if 'WORLD_SIZE' in os.environ:
        args.distributed = int(os.environ['WORLD_SIZE']) > 1

    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend='smddp', init_method='env://')
        args.N_gpu = torch.distributed.get_world_size()
    else:
        args.N_gpu = 1

    if args.seed is None:
        args.seed = np.random.randint(1e4)

    if args.distributed:
        args.seed = (args.seed + torch.distributed.get_rank()) % 2**32
    print("Using seed = {}".format(args.seed))
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    np.random.seed(seed=args.seed)


    # Setup data, defaults
    dboxes = dboxes300_coco()
    encoder = Encoder(dboxes)
    cocoGt = get_coco_ground_truth(args)

    train_loader = get_train_loader(args, args.seed - 2**31)

    val_dataset = get_val_dataset(args)
    val_dataloader = get_val_dataloader(val_dataset, args)

    ssd300 = SSD300(backbone=ResNet(args.backbone, args.backbone_path))
    args.learning_rate = args.learning_rate * args.N_gpu * (args.batch_size / 32)
    start_epoch = 0
    iteration = 0
    loss_func = Loss(dboxes)

    if use_cuda:
        ssd300.cuda()
        loss_func.cuda()

    optimizer = torch.optim.SGD(tencent_trick(ssd300), lr=args.learning_rate,
                                    momentum=args.momentum, weight_decay=args.weight_decay)
    scheduler = MultiStepLR(optimizer=optimizer, milestones=args.multistep, gamma=0.1)
    if args.amp:
        ssd300, optimizer = amp.initialize(ssd300, optimizer, opt_level='O2')

    if args.distributed:
        ssd300 = DDP(ssd300)

    if args.checkpoint is not None:
        if os.path.isfile(args.checkpoint):
            load_checkpoint(ssd300.module if args.distributed else ssd300, args.checkpoint)
            checkpoint = torch.load(args.checkpoint,
                                    map_location=lambda storage, loc: storage.cuda(torch.cuda.current_device()))
            start_epoch = checkpoint['epoch']
            iteration = checkpoint['iteration']
            scheduler.load_state_dict(checkpoint['scheduler'])
            optimizer.load_state_dict(checkpoint['optimizer'])
        else:
            print('Provided checkpoint is not path to a file')
            return

    inv_map = {v: k for k, v in val_dataset.label_map.items()}

    total_time = 0

    if args.mode == 'evaluation':
        acc = evaluate(ssd300, val_dataloader, cocoGt, encoder, inv_map, args)
        if args.local_rank == 0:
            print('Model precision {} mAP'.format(acc))

        return
    mean, std = generate_mean_std(args)

    for epoch in range(start_epoch, args.epochs):
        start_epoch_time = time.time()
        scheduler.step()
        iteration = train_loop_func(ssd300, loss_func, epoch, optimizer, train_loader, val_dataloader, encoder, iteration,
                                    logger, args, mean, std)
        end_epoch_time = time.time() - start_epoch_time
        total_time += end_epoch_time

        if torch.distributed.get_rank() == 0:
            throughput = train_samples / end_epoch_time
            logger.update_epoch_time(epoch, end_epoch_time)
            logger.update_throughput_speed(epoch, throughput)

        if epoch in args.evaluation:
            acc = evaluate(ssd300, val_dataloader, cocoGt, encoder, inv_map, args)

        if args.save and args.local_rank == 0:
            print("saving model...")
            obj = {'epoch': epoch + 1,
                   'iteration': iteration,
                   'optimizer': optimizer.state_dict(),
                   'scheduler': scheduler.state_dict(),
                   'label_map': val_dataset.label_info}
            if args.distributed:
                obj['model'] = ssd300.module.state_dict()
            else:
                obj['model'] = ssd300.state_dict()
            save_path = os.path.join(args.save, f'epoch_{epoch}.pt')
            torch.save(obj, save_path)
            logger.log('model path', save_path)
        train_loader.reset()

    if torch.distributed.get_rank() == 0:
        DLLogger.log((), { 'Total training time': '%.2f' % total_time + ' secs' })
        logger.log_summary()
Exemple #9
0
def train(args):
    if args.amp:
        amp_handle = amp.init(enabled=args.fp16)

    args.distributed = False
    if 'WORLD_SIZE' in os.environ:
        args.distributed = int(os.environ['WORLD_SIZE']) > 1

    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend='nccl', init_method='env://')
        args.N_gpu = torch.distributed.get_world_size()
    else:
        args.N_gpu = 1

    dboxes = dboxes300_coco()
    encoder = Encoder(dboxes)
    cocoGt = get_coco_ground_truth(args)

    ssd300 = model(args)
    args.learning_rate = args.learning_rate * args.N_gpu * (args.batch_size / 32)
    iteration = 0
    loss_func = Loss(dboxes)

    loss_func.cuda()

    optimizer = torch.optim.SGD(
        tencent_trick(ssd300), 
        lr=args.learning_rate,
        momentum=args.momentum,
        weight_decay=args.weight_decay)

    scheduler = MultiStepLR(
        optimizer=optimizer, 
        milestones=args.multistep, 
        gamma=0.1)

    if args.fp16:
        if args.amp:
            optimizer = amp_handle.wrap_optimizer(optimizer)
        else:
            optimizer = FP16_Optimizer(optimizer, static_loss_scale=128.)

    val_dataloader, inv_map = get_val_dataloader(args)
    train_loader = get_train_loader(args, dboxes)

    acc = 0
    logger = Logger(args.batch_size, args.local_rank)
    
    for epoch in range(0, args.epochs):
        logger.start_epoch()
        scheduler.step()

        iteration = train_loop(
            ssd300, loss_func, epoch, optimizer, 
            train_loader, iteration, logger, args)

        logger.end_epoch()

        if epoch in args.evaluation:
            acc = evaluate(ssd300, val_dataloader, cocoGt, encoder, inv_map, args)
            if args.local_rank == 0:
                print('Epoch {:2d}, Accuracy: {:4f} mAP'.format(epoch, acc))

        if args.data_pipeline == 'dali':
            train_loader.reset()

    return acc, logger.average_speed()
Exemple #10
0
 def __init__(self):
     self.default_box = dboxes300_coco()
     self.encoder = Encoder(self.default_box)
Exemple #11
0
def train(train_loop_func, logger, args):
    # Check that GPUs are actually available
    use_cuda = not args.no_cuda

    # Setup multi-GPU if necessary
    args.distributed = False
    if 'WORLD_SIZE' in os.environ:
        args.distributed = int(os.environ['WORLD_SIZE']) > 1

    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        args.N_gpu = torch.distributed.get_world_size()
    else:
        args.N_gpu = 1

    if args.seed is None:
        args.seed = np.random.randint(1e4)

    if args.distributed:
        args.seed = (args.seed + torch.distributed.get_rank()) % 2**32
    print("Using seed = {}".format(args.seed))
    torch.manual_seed(args.seed)
    np.random.seed(seed=args.seed)

    # Setup data, defaults
    dboxes = dboxes300_coco()
    encoder = Encoder(dboxes)
    cocoGt = get_coco_ground_truth(args)

    train_loader = get_train_loader(args, args.seed - 2**31)

    val_dataset = get_val_dataset(args)
    val_dataloader = get_val_dataloader(val_dataset, args)

    ssd300 = SSD300(backbone=ResNet(args.backbone, args.backbone_path))
    # args.learning_rate = args.learning_rate * args.N_gpu * (args.batch_size / 32)

    print(f"Actual starting LR: {args.learning_rate}")

    start_epoch = 0
    iteration = 0
    loss_func = Loss(dboxes)

    if use_cuda:
        ssd300.cuda()
        loss_func.cuda()

    # optimizer = torch.optim.SGD(tencent_trick(ssd300), lr=args.learning_rate,
    #                                 momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True)
    optimizer = torch.optim.AdamW(tencent_trick(ssd300),
                                  lr=args.learning_rate,
                                  betas=(0.8, 0.999),
                                  eps=1e-08,
                                  weight_decay=0.01,
                                  amsgrad=True)

    # scheduler = MultiStepLR(optimizer=optimizer, milestones=args.multistep, gamma=0.1)
    # scheduler = CosineAnnealingWarmRestarts(optimizer=optimizer, T_0=20, T_mult=1, eta_min=1e-6)
    scheduler = CosineAnnealingLR(optimizer=optimizer,
                                  T_max=args.epochs,
                                  eta_min=1e-6)

    # scheduler = OneCycleLR(optimizer, max_lr=0.003, epochs=41, steps_per_epoch=173)
    # scheduler = CyclicLR(optimizer, base_lr=args.learning_rate, max_lr=2*args.learning_rate,
    #                      step_size_up=173*3, step_size_down=173*10)

    if args.amp:
        ssd300, optimizer = amp.initialize(ssd300, optimizer, opt_level='O2')

    if args.distributed:
        ssd300 = DDP(ssd300)

    if args.checkpoint is not None:
        if os.path.isfile(args.checkpoint):
            load_checkpoint(ssd300.module if args.distributed else ssd300,
                            args.checkpoint)
            checkpoint = torch.load(args.checkpoint,
                                    map_location=lambda storage, loc: storage.
                                    cuda(torch.cuda.current_device()))
            start_epoch = checkpoint['epoch']
            iteration = checkpoint['iteration']
            scheduler.load_state_dict(checkpoint['scheduler'])
            optimizer.load_state_dict(checkpoint['optimizer'])
        else:
            print('Provided checkpoint is not path to a file')
            return

    inv_map = {v: k for k, v in val_dataset.label_map.items()}

    total_time = 0

    if args.mode == 'evaluation':
        acc = evaluate(ssd300, val_dataloader, cocoGt, encoder, inv_map, args)
        if args.local_rank == 0:
            print('Model precision {} mAP'.format(acc))

        return
    mean, std = generate_mean_std(args)

    for epoch in range(start_epoch, args.epochs):
        start_epoch_time = time.time()
        # scheduler.step()
        iteration = train_loop_func(ssd300, loss_func, epoch, optimizer,
                                    scheduler, train_loader, val_dataloader,
                                    encoder, iteration, logger, args, mean,
                                    std)
        end_epoch_time = time.time() - start_epoch_time
        total_time += end_epoch_time

        # https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
        scheduler.step()

        if args.local_rank == 0:
            logger.update_epoch_time(epoch, end_epoch_time)

        if epoch in args.evaluation:
            acc = evaluate(ssd300, val_dataloader, cocoGt, encoder, inv_map,
                           args)

            if args.local_rank == 0:
                logger.update_epoch(epoch, acc)

        if args.save and args.local_rank == 0:
            print("saving model...")
            obj = {
                'epoch': epoch + 1,
                'iteration': iteration,
                'optimizer': optimizer.state_dict(),
                'scheduler': scheduler.state_dict(),
                'label_map': val_dataset.label_info
            }
            if args.distributed:
                obj['model'] = ssd300.module.state_dict()
            else:
                obj['model'] = ssd300.state_dict()
            torch.save(obj, './models/epoch_{}.pt'.format(epoch))
        train_loader.reset()
    print('total training time: {}'.format(total_time))
Exemple #12
0
def train(args):
    args.distributed = False
    if 'WORLD_SIZE' in os.environ:
        args.distributed = int(os.environ['WORLD_SIZE']) > 1

    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        args.N_gpu = torch.distributed.get_world_size()
    else:
        args.N_gpu = 1

    dboxes = dboxes300_coco()
    encoder = Encoder(dboxes)
    cocoGt = get_coco_ground_truth(args)

    val_dataset = get_val_dataset(args)
    val_dataloader = get_val_dataloader(val_dataset, args)

    ssd300 = SSD300(len(cocoGt.cats) + 1)
    args.learning_rate = args.learning_rate * \
        args.N_gpu * (args.batch_size / 32)
    iteration = 0
    loss_func = Loss(dboxes)

    ssd300.cuda()
    loss_func.cuda()

    if args.fp16:
        ssd300 = network_to_half(ssd300)

    if args.distributed:
        ssd300 = DDP(ssd300)

    optimizer = torch.optim.SGD(tencent_trick(ssd300),
                                lr=args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    scheduler = MultiStepLR(optimizer=optimizer,
                            milestones=args.multistep,
                            gamma=0.1)

    if args.fp16:
        optimizer = FP16_Optimizer(optimizer, static_loss_scale=128.)

    inv_map = {v: k for k, v in val_dataset.label_map.items()}

    avg_loss = 0.0
    acc = 0
    batch_perf = AverageMeter()
    end = time.time()
    train_start = end

    args.train_annotate = os.path.join(args.data,
                                       "annotations/instances_train2017.json")
    args.train_coco_root = os.path.join(args.data, "train2017")
    local_seed = set_seeds(args)

    if args.data_pipeline == 'no_dali':
        train_trans = SSDTransformer(dboxes, args, (300, 300), val=False)
        train_dataset = get_train_dataset(args, train_trans)
        train_loader = get_train_loader(train_dataset, args, args.num_workers)
    elif args.data_pipeline == 'dali':
        train_loader = get_train_dali_loader(args, dboxes, local_seed)

    for epoch in range(args.epochs):
        start_epoch_time = time.time()
        scheduler.step()

        epoch_loop(train_loader, args, ssd300, time.time(), loss_func,
                   optimizer, iteration, avg_loss, batch_perf, epoch)
        torch.cuda.synchronize()

        if epoch in args.evaluation:
            acc = evaluate(ssd300, val_dataloader, cocoGt, encoder, inv_map,
                           args)

        try:
            train_loader.reset()
        except AttributeError:
            pass

    if args.local_rank == 0:
        print(
            "Training end: Average speed: {:3f} img/sec, Total time: {:3f} sec, Final accuracy: {:3f} mAP"
            .format(args.N_gpu * args.batch_size / batch_perf.avg,
                    time.time() - train_start, acc))