Beispiel #1
0
def main():
    device = paddle.set_device(FLAGS.device)
    paddle.disable_static(device) if FLAGS.dynamic else None

    if not FLAGS.eval_only:  # training mode
        train_transform = Compose([
            ColorDistort(),
            RandomExpand(),
            RandomCrop(),
            RandomFlip(),
            NormalizeBox(),
            PadBox(),
            BboxXYXY2XYWH()
        ])

        train_collate_fn = BatchCompose([RandomShape(), NormalizeImage()])
        dataset = COCODataset(dataset_dir=FLAGS.data,
                              anno_path='annotations/instances_train2017.json',
                              image_dir='train2017',
                              with_background=False,
                              mixup=True,
                              transform=train_transform)
        batch_sampler = DistributedBatchSampler(dataset,
                                                batch_size=FLAGS.batch_size,
                                                shuffle=True,
                                                drop_last=True)
        loader = DataLoader(dataset,
                            batch_sampler=batch_sampler,
                            places=device,
                            num_workers=FLAGS.num_workers,
                            return_list=True,
                            collate_fn=train_collate_fn)
    else:  # evaluation mode
        eval_transform = Compose([
            ResizeImage(target_size=608),
            NormalizeBox(),
            PadBox(),
            BboxXYXY2XYWH()
        ])

        eval_collate_fn = BatchCompose([NormalizeImage()])
        dataset = COCODataset(dataset_dir=FLAGS.data,
                              anno_path='annotations/instances_val2017.json',
                              image_dir='val2017',
                              with_background=False,
                              transform=eval_transform)
        # batch_size can only be 1 in evaluation for YOLOv3
        # prediction bbox is a LoDTensor
        batch_sampler = DistributedBatchSampler(dataset,
                                                batch_size=1,
                                                shuffle=False,
                                                drop_last=False)
        loader = DataLoader(dataset,
                            batch_sampler=batch_sampler,
                            places=device,
                            num_workers=FLAGS.num_workers,
                            return_list=True,
                            collate_fn=eval_collate_fn)

    pretrained = FLAGS.eval_only and FLAGS.weights is None
    model = yolov3_darknet53(num_classes=dataset.num_classes,
                             num_max_boxes=NUM_MAX_BOXES,
                             model_mode='eval' if FLAGS.eval_only else 'train',
                             pretrained=pretrained)

    if FLAGS.pretrain_weights and not FLAGS.eval_only:
        model.load(FLAGS.pretrain_weights,
                   skip_mismatch=True,
                   reset_optimizer=True)

    optim = make_optimizer(len(batch_sampler),
                           parameter_list=model.parameters())

    model.prepare(optimizer=optim,
                  loss=YoloLoss(num_classes=dataset.num_classes))

    # NOTE: we implement COCO metric of YOLOv3 model here, separately
    # from 'prepare' and 'fit' framework for follwing reason:
    # 1. YOLOv3 network structure is different between 'train' and
    # 'eval' mode, in 'eval' mode, output prediction bbox is not the
    # feature map used for YoloLoss calculating
    # 2. COCO metric behavior is also different from defined Metric
    # for COCO metric should not perform accumulate in each iteration
    # but only accumulate at the end of an epoch
    if FLAGS.eval_only:
        if FLAGS.weights is not None:
            model.load(FLAGS.weights, reset_optimizer=True)
        preds = model.predict(loader, stack_outputs=False)
        _, _, _, img_ids, bboxes = preds

        anno_path = os.path.join(FLAGS.data,
                                 'annotations/instances_val2017.json')
        coco_metric = COCOMetric(anno_path=anno_path, with_background=False)
        for img_id, bbox in zip(img_ids, bboxes):
            coco_metric.update(img_id, bbox)
        coco_metric.accumulate()
        coco_metric.reset()
        return

    if FLAGS.resume is not None:
        model.load(FLAGS.resume)

    save_dir = FLAGS.save_dir or 'yolo_checkpoint'

    model.fit(train_data=loader,
              epochs=FLAGS.epoch - FLAGS.no_mixup_epoch,
              save_dir=os.path.join(save_dir, "mixup"),
              save_freq=10)

    # do not use image mixup transfrom in the last FLAGS.no_mixup_epoch epoches
    dataset.mixup = False
    model.fit(train_data=loader,
              epochs=FLAGS.no_mixup_epoch,
              save_dir=os.path.join(save_dir, "no_mixup"),
              save_freq=5)
def train(train_config):
    logger = Logger(HOME+'/log', train_config.basenet)
    if train_config.dataset_name == 'VOC':
        cfg = voc_config
        dataset = VOCDataset(DATA_DIR, transform=SSDAugmentation(
            cfg['min_dim'], MEANS))
    elif train_config.dataset_name == 'COCO':
        cfg = coco_config
        dataset = COCODataset(DATA_DIR, transform=SSDAugmentation(
            cfg['min_dim'], MEANS))

    if train_config.visdom:
        import visdom
        viz = visdom.Visdom()

    ssd_net = SSD('train', train_config.basenet,
                  cfg['min_dim'], cfg['num_classes'], with_fpn=train_config.with_fpn)
    net = ssd_net
    if train_config.cuda:
        net = nn.DataParallel(ssd_net)
        cudnn.benchmark = True
    if train_config.resume:
        logger('Loading {} ...'.format(train_config.resume))
        load_weights = torch.load(
            train_config.resume, map_location=lambda storage, loc: storage)
        ssd_net.load_state_dict(load_weights)
    if train_config.cuda:
        net = net.cuda()
    if not train_config.resume:
        logger('Initializing weights ...')
        ssd_net.topnet.apply(weights_init)
        ssd_net.loc_layers.apply(weights_init)
        ssd_net.conf_layers.apply(weights_init)

    optimizer = optim.Adam(net.parameters(), lr=train_config.lr,
                           weight_decay=train_config.weight_decay)
    criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5,
                             False, train_config.cuda)

    net.train()
    # loss counters
    loc_loss = 0
    conf_loss = 0
    epoch = 0
    logger('Loading the dataset...')

    epoch_size = len(dataset) // train_config.batch_size
    logger('Training SSD on:{}'.format(dataset.name))
    # logger('using the specified args:')

    step_index = 0

    if train_config.visdom:
        vis_title = 'SSD.PyTorch on ' + dataset.name
        vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss']
        iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend)
        epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend)

    data_loader = data.DataLoader(dataset, train_config.batch_size,
                                  num_workers=train_config.num_workers,
                                  shuffle=True, collate_fn=detection_collate,
                                  pin_memory=True)
    # create batch iterator
    batch_iterator = iter(data_loader)
    t0 = time.time()
    for iteration in range(train_config.start_iter, cfg['max_iter']):
        if train_config.visdom and iteration != 0 and (iteration % epoch_size == 0):
            update_vis_plot(epoch, loc_loss.item(), conf_loss.item(), epoch_plot, None,
                            'append', epoch_size)
            logger('epoch = {} : loss = {}, loc_loss = {}, conf_loss = {}'.format(
                epoch, loc_loss + conf_loss, loc_loss, conf_loss))
            # reset epoch loss counters
            loc_loss = 0
            conf_loss = 0
            epoch += 1

        if iteration in cfg['lr_steps']:
            step_index += 1
            adjust_learning_rate(optimizer, train_config.lr,
                                 train_config.gamma, step_index)

        # load train data
        images, targets = next(batch_iterator)

        if iteration//epoch_size > 0 and iteration % epoch_size == 0:
            batch_iterator = iter(data_loader)
            print(iteration)

        if train_config.cuda:
            images = images.cuda()
            targets = [ann.cuda()for ann in targets]
        # else:
        #     images=torch.tensor(images)
        #     targets=torch.tensor(targets)
        # forward

        out = net(images)
        # backprop
        optimizer.zero_grad()
        loss_l, loss_c = criterion(out, targets)
        loss = loss_l + loss_c
        loss.backward()
        optimizer.step()
        t1 = time.time()
        if train_config.visdom:
            loc_loss += loss_l.item()
            conf_loss += loss_c.item()

        if iteration % 50 == 0:
            t1 = time.time()
            logger('timer: %.4f sec. || ' % (t1 - t0)+'iter ' + repr(iteration) +
                   ' || Loss: %.4f ||' % (loss.item()) +
                   ' || loc_loss: %.4f ||' % (loss_l.item()) +
                   ' || conf_loss: %.4f ||' % (loss_c.item()))
            t0 = time.time()

        if train_config.visdom:
            update_vis_plot(iteration, loss_l.item(), loss_c.item(),
                            iter_plot, epoch_plot, 'append')

        if iteration != 0 and iteration % 5000 == 0:
            logger('Saving state, iter:%d' % iteration)
            torch.save(ssd_net.state_dict(), train_config.save_folder +
                       'ssd224_VOC_' + repr(iteration) + '.pth')
    torch.save(ssd_net.state_dict(),
               train_config.save_folder + 'ssd224_VOC.pth')