Ejemplo n.º 1
0
def train():
    cfg = get_args(**Cfg)
    os.chdir(cfg.root_dir)
    os.makedirs(cfg.log_dir, exist_ok=True)
    os.makedirs(cfg.ckpt_dir, exist_ok=True)

    logger = create_logger(cfg.local_rank, save_dir=cfg.log_dir)
    print = logger.info
    print(cfg)

    torch.manual_seed(317)
    torch.backends.cudnn.benchmark = True

    cfg.device = torch.device('cuda')

    print('Setting up data...')
    train_dataset = KneeDataset(cfg.train_label, cfg)
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=cfg.batch_size,
                                               shuffle=not cfg.dist,
                                               num_workers=cfg.num_workers,
                                               pin_memory=True,
                                               drop_last=True)

    val_dataset = KneeDataset(cfg.val_label, cfg, False)
    eval_loader = torch.utils.data.DataLoader(val_dataset,
                                              batch_size=cfg.batch_size,
                                              shuffle=False,
                                              num_workers=1,
                                              pin_memory=True,
                                              collate_fn=collate_fn)
    center_net = CenterNet(cfg)
    center_net.train(len(train_dataset), train_loader, eval_loader,
                     cfg.num_epochs)
Ejemplo n.º 2
0
def main():
    saver = create_saver(cfg.local_rank, save_dir=cfg.ckpt_dir)
    logger = create_logger(cfg.local_rank, save_dir=cfg.log_dir)
    summary_writer = create_summary(cfg.local_rank, log_dir=cfg.log_dir)
    print = logger.info
    print(cfg)

    torch.manual_seed(317)
    torch.backends.cudnn.benchmark = True  # disable this if OOM at beginning of training

    num_gpus = torch.cuda.device_count()
    if cfg.dist:
        cfg.device = torch.device('cuda:%d' % cfg.local_rank)
        torch.cuda.set_device(cfg.local_rank)
        dist.init_process_group(backend='nccl',
                                init_method='env://',
                                world_size=num_gpus,
                                rank=cfg.local_rank)
    else:
        cfg.device = torch.device('cuda')

    print('Setting up data...')
    Dataset = COCO if cfg.dataset == 'coco' else PascalVOC
    train_dataset = Dataset(cfg.data_dir,
                            'train',
                            split_ratio=cfg.split_ratio,
                            img_size=cfg.img_size)
    train_sampler = torch.utils.data.distributed.DistributedSampler(
        train_dataset, num_replicas=num_gpus, rank=cfg.local_rank)
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=cfg.batch_size // num_gpus if cfg.dist else cfg.batch_size,
        shuffle=not cfg.dist,
        num_workers=cfg.num_workers,
        pin_memory=True,
        drop_last=True,
        sampler=train_sampler if cfg.dist else None)

    Dataset_eval = COCO_eval if cfg.dataset == 'coco' else PascalVOC_eval
    val_dataset = Dataset_eval(cfg.data_dir,
                               'val',
                               test_scales=[1.],
                               test_flip=False)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=1,
                                             pin_memory=True,
                                             collate_fn=val_dataset.collate_fn)

    print('Creating model...')
    if 'hourglass' in cfg.arch:
        model = get_hourglass[cfg.arch]
    elif 'resdcn' in cfg.arch:
        model = get_pose_net(num_layers=int(cfg.arch.split('_')[-1]),
                             num_classes=train_dataset.num_classes)
    else:
        raise NotImplementedError

    if cfg.dist:
        # model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
        model = model.to(cfg.device)
        model = nn.parallel.DistributedDataParallel(
            model, device_ids=[
                cfg.local_rank,
            ], output_device=cfg.local_rank)
    else:
        # todo don't use this, or wrapped it with utils.losses.Loss() !
        model = nn.DataParallel(model).to(cfg.device)

    optimizer = torch.optim.Adam(model.parameters(), cfg.lr)
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                        cfg.lr_step,
                                                        gamma=0.1)

    def train(epoch):
        print('\n%s Epoch: %d' % (datetime.now(), epoch))
        model.train()
        tic = time.perf_counter()
        for batch_idx, batch in enumerate(train_loader):
            for k in batch:
                batch[k] = batch[k].to(device=cfg.device, non_blocking=True)

            outputs = model(batch['image'])
            hmap_tl, hmap_br, embd_tl, embd_br, regs_tl, regs_br = zip(
                *outputs)

            embd_tl = [
                _tranpose_and_gather_feature(e, batch['inds_tl'])
                for e in embd_tl
            ]
            embd_br = [
                _tranpose_and_gather_feature(e, batch['inds_br'])
                for e in embd_br
            ]
            regs_tl = [
                _tranpose_and_gather_feature(r, batch['inds_tl'])
                for r in regs_tl
            ]
            regs_br = [
                _tranpose_and_gather_feature(r, batch['inds_br'])
                for r in regs_br
            ]

            focal_loss = _neg_loss(hmap_tl, batch['hmap_tl']) + \
                         _neg_loss(hmap_br, batch['hmap_br'])
            reg_loss = _reg_loss(regs_tl, batch['regs_tl'], batch['ind_masks']) + \
                       _reg_loss(regs_br, batch['regs_br'], batch['ind_masks'])
            pull_loss, push_loss = _ae_loss(embd_tl, embd_br,
                                            batch['ind_masks'])

            loss = focal_loss + 0.1 * pull_loss + 0.1 * push_loss + reg_loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if batch_idx % cfg.log_interval == 0:
                duration = time.perf_counter() - tic
                tic = time.perf_counter()
                print(
                    '[%d/%d-%d/%d] ' %
                    (epoch, cfg.num_epochs, batch_idx, len(train_loader)) +
                    ' focal_loss= %.5f pull_loss= %.5f push_loss= %.5f reg_loss= %.5f'
                    % (focal_loss.item(), pull_loss.item(), push_loss.item(),
                       reg_loss.item()) + ' (%d samples/sec)' %
                    (cfg.batch_size * cfg.log_interval / duration))

                step = len(train_loader) * epoch + batch_idx
                summary_writer.add_scalar('focal_loss', focal_loss.item(),
                                          step)
                summary_writer.add_scalar('pull_loss', pull_loss.item(), step)
                summary_writer.add_scalar('push_loss', push_loss.item(), step)
                summary_writer.add_scalar('reg_loss', reg_loss.item(), step)
        return

    def val_map(epoch):
        print('\n%s Val@Epoch: %d' % (datetime.now(), epoch))
        model.eval()
        # torch.cuda.empty_cache()

        results = {}
        with torch.no_grad():
            for inputs in val_loader:
                img_id, inputs = inputs[0]

                detections = []
                for scale in inputs:
                    inputs[scale]['image'] = inputs[scale]['image'].to(
                        cfg.device)
                    output = model(inputs[scale]['image'])[-1]
                    det = _decode(*output, ae_threshold=0.5, K=100, kernel=3)
                    det = det.reshape(det.shape[0], -1,
                                      8).detach().cpu().numpy()
                    if det.shape[0] == 2:
                        det[1, :, [0, 2]] = inputs[scale]['fmap_size'][
                            0, 1] - det[1, :, [2, 0]]
                    det = det.reshape(1, -1, 8)

                    _rescale_dets(det, inputs[scale]['ratio'],
                                  inputs[scale]['border'],
                                  inputs[scale]['size'])
                    det[:, :, 0:4] /= scale
                    detections.append(det)

                detections = np.concatenate(detections, axis=1)[0]
                # reject detections with negative scores
                detections = detections[detections[:, 4] > -1]
                classes = detections[..., -1]

                results[img_id] = {}
                for j in range(val_dataset.num_classes):
                    keep_inds = (classes == j)
                    results[img_id][j +
                                    1] = detections[keep_inds][:, 0:7].astype(
                                        np.float32)
                    soft_nms_merge(results[img_id][j + 1],
                                   Nt=0.5,
                                   method=2,
                                   weight_exp=10)
                    # soft_nms(results[img_id][j + 1], Nt=0.5, method=2)
                    results[img_id][j + 1] = results[img_id][j + 1][:, 0:5]

                scores = np.hstack([
                    results[img_id][j][:, -1]
                    for j in range(1, val_dataset.num_classes + 1)
                ])
                if len(scores) > val_dataset.max_objs:
                    kth = len(scores) - val_dataset.max_objs
                    thresh = np.partition(scores, kth)[kth]
                    for j in range(1, val_dataset.num_classes + 1):
                        keep_inds = (results[img_id][j][:, -1] >= thresh)
                        results[img_id][j] = results[img_id][j][keep_inds]

        eval_results = val_dataset.run_eval(results, save_dir=cfg.ckpt_dir)
        print(eval_results)
        summary_writer.add_scalar('val_mAP/mAP', eval_results[0], epoch)

    print('Starting training...')
    for epoch in range(1, cfg.num_epochs + 1):
        train_sampler.set_epoch(epoch)
        train(epoch)
        if cfg.val_interval > 0 and epoch % cfg.val_interval == 0:
            val_map(epoch)
        print(saver.save(model.module.state_dict(), 'checkpoint'))
        lr_scheduler.step(epoch)  # move to here after pytorch1.1.0

    summary_writer.close()
def main():
    best_mAP = 0.0
    saver = create_saver(cfg.local_rank, save_dir=cfg.ckpt_dir)
    logger = create_logger(cfg.local_rank, save_dir=cfg.log_dir)
    summary_writer = create_summary(cfg.local_rank, log_dir=cfg.log_dir)

    print_log = logger.info
    print_log(cfg)

    torch.manual_seed(319)
    torch.backends.cudnn.benchmark = True  # disable this if OOM at beginning of training

    num_gpus = torch.cuda.device_count()
    if cfg.dist:
        cfg.device = torch.device('cuda:%d' % cfg.local_rank)
        torch.cuda.set_device(cfg.local_rank)
        dist.init_process_group(backend='nccl',
                                init_method='env://',
                                world_size=num_gpus,
                                rank=cfg.local_rank)
    else:
        cfg.device = torch.device('cuda:%d' % cfg.device_id)

    print_log('Setting up data...')
    cfg.dictionary_file = os.path.join(
        cfg.dictionary_folder,
        'train_dict_v{}_n{}_a{:.2f}.npy'.format(cfg.n_vertices, cfg.n_codes,
                                                cfg.sparse_alpha))
    print_log('Loading the dictionary: ' + cfg.dictionary_file)
    dictionary = np.load(cfg.dictionary_file)
    if 'hourglass' in cfg.arch:
        cfg.padding = 127
    else:
        cfg.padding = 31
    Dataset = COCOSEGMCMM if cfg.dataset == 'coco' else KINSSEGMCMM
    train_dataset = Dataset(cfg.data_dir,
                            cfg.dictionary_file,
                            'train',
                            split_ratio=cfg.split_ratio,
                            img_size=cfg.img_size,
                            padding=cfg.padding,
                            n_coeffs=cfg.n_codes,
                            n_vertices=cfg.n_vertices,
                            sparse_alpha=cfg.sparse_alpha)
    train_sampler = torch.utils.data.distributed.DistributedSampler(
        train_dataset, num_replicas=num_gpus, rank=cfg.local_rank)
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=cfg.batch_size // num_gpus if cfg.dist else cfg.batch_size,
        shuffle=not cfg.dist,
        num_workers=cfg.num_workers,
        pin_memory=False,
        drop_last=True,
        sampler=train_sampler if cfg.dist else None)

    Dataset_eval = COCO_eval_segm_cmm if cfg.dataset == 'coco' else KINS_eval_segm_cmm
    val_dataset = Dataset_eval(cfg.data_dir,
                               cfg.dictionary_file,
                               'val',
                               test_scales=[1.],
                               test_flip=False,
                               img_size=cfg.img_size,
                               padding=cfg.padding,
                               n_coeffs=cfg.n_codes,
                               n_vertices=cfg.n_vertices,
                               fix_size=False,
                               sparse_alpha=cfg.sparse_alpha)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=1,
                                             pin_memory=False,
                                             collate_fn=val_dataset.collate_fn)

    print_log('Creating model...')
    if 'hourglass' in cfg.arch:
        model = exkp(n=5,
                     nstack=2,
                     dims=[256, 256, 384, 384, 384, 512],
                     modules=[2, 2, 2, 2, 2, 4],
                     n_codes=cfg.n_codes)
    elif 'resdcn' in cfg.arch:
        model = get_pose_resdcn(num_layers=int(cfg.arch.split('_')[-1]),
                                head_conv=64,
                                num_classes=train_dataset.num_classes,
                                num_codes=cfg.n_codes)
    else:
        raise NotImplementedError

    if cfg.dist:
        # model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
        model = model.to(cfg.device)
        model = nn.parallel.DistributedDataParallel(
            model, device_ids=[
                cfg.local_rank,
            ], output_device=cfg.local_rank)
    else:
        model = nn.DataParallel(model, device_ids=[
            cfg.local_rank,
        ]).to(cfg.device)

    if cfg.pretrain_checkpoint is not None and os.path.isfile(
            cfg.pretrain_checkpoint):
        print_log('Load pretrain model from ' + cfg.pretrain_checkpoint)
        model = load_model(model, cfg.pretrain_checkpoint, cfg.device_id)
        torch.cuda.empty_cache()

    optimizer = torch.optim.Adam(model.parameters(), cfg.lr)
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                        cfg.lr_step,
                                                        gamma=cfg.gamma)

    def train(epoch):
        print_log('\n Epoch: %d' % epoch)
        model.train()
        # torch.autograd.set_detect_anomaly(mode=True)

        tic = time.perf_counter()
        for batch_idx, batch in enumerate(train_loader):
            for k in batch:
                if k != 'meta':
                    batch[k] = batch[k].to(device=cfg.device,
                                           non_blocking=True)

            dict_tensor = torch.from_numpy(dictionary.astype(np.float32)).to(
                cfg.device, non_blocking=True)
            dict_tensor.requires_grad = False

            outputs = model(batch['image'])
            # hmap, regs, w_h_, codes_1, codes_2, codes_3, offsets = zip(*outputs)
            hmap, regs, w_h_, codes, offsets = zip(*outputs)

            regs = [
                _tranpose_and_gather_feature(r, batch['inds']) for r in regs
            ]
            w_h_ = [
                _tranpose_and_gather_feature(r, batch['inds']) for r in w_h_
            ]
            codes = [
                _tranpose_and_gather_feature(r, batch['inds']) for r in codes
            ]
            # c_2 = [_tranpose_and_gather_feature(r, batch['inds']) for r in codes_2]
            # c_3 = [_tranpose_and_gather_feature(r, batch['inds']) for r in codes_3]
            offsets = [
                _tranpose_and_gather_feature(r, batch['inds']) for r in offsets
            ]

            hmap_loss = _neg_loss(hmap, batch['hmap'])
            reg_loss = _reg_loss(regs, batch['regs'], batch['ind_masks'])
            w_h_loss = _reg_loss(w_h_, batch['w_h_'], batch['ind_masks'])
            offsets_loss = _reg_loss(offsets, batch['offsets'],
                                     batch['ind_masks'])
            # codes_loss = (norm_reg_loss(c_1, batch['codes'], batch['ind_masks'], sparsity=0.)
            #               + norm_reg_loss(c_2, batch['codes'], batch['ind_masks'], sparsity=0.)
            #               + norm_reg_loss(c_3, batch['codes'], batch['ind_masks'], sparsity=0.)) / 3.

            if cfg.code_loss == 'norm':
                codes_loss = norm_reg_loss(codes,
                                           batch['codes'],
                                           batch['ind_masks'],
                                           sparsity=0.)
            elif cfg.code_loss == 'adapt':
                codes_loss = adapt_norm_reg_loss(codes,
                                                 batch['codes'],
                                                 batch['ind_masks'],
                                                 sparsity=0.,
                                                 norm=cfg.adapt_norm)
            elif cfg.code_loss == 'wing':
                codes_loss = wing_norm_reg_loss(codes,
                                                batch['codes'],
                                                batch['ind_masks'],
                                                sparsity=0.,
                                                epsilon=cfg.wing_epsilon,
                                                omega=cfg.wing_omega)
            else:
                print('Loss type for code not implemented yet.')
                raise NotImplementedError

            loss = 1. * hmap_loss + 1. * reg_loss + 0.1 * w_h_loss + 0.1 * offsets_loss + \
                   cfg.code_loss_weight * codes_loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if batch_idx % cfg.log_interval == 0:
                duration = time.perf_counter() - tic
                tic = time.perf_counter()
                print_log(
                    '[%d/%d-%d/%d] ' %
                    (epoch, cfg.num_epochs, batch_idx, len(train_loader)) +
                    'Loss: hmap = %.3f reg = %.3f w_h = %.3f code = %.3f offsets = %.3f'
                    % (hmap_loss.item(), reg_loss.item(), w_h_loss.item(),
                       codes_loss.item(), offsets_loss.item()) +
                    ' (%d samples/sec)' %
                    (cfg.batch_size * cfg.log_interval / duration))

                step = len(train_loader) * epoch + batch_idx
                summary_writer.add_scalar('hmap_loss', hmap_loss.item(), step)
                summary_writer.add_scalar('reg_loss', reg_loss.item(), step)
                summary_writer.add_scalar('w_h_loss', w_h_loss.item(), step)
                summary_writer.add_scalar('offset_loss', offsets_loss.item(),
                                          step)
                summary_writer.add_scalar('code_loss', codes_loss.item(), step)
        return

    def val_map(epoch):
        print_log('\n Val@Epoch: %d' % epoch)
        model.eval()
        torch.cuda.empty_cache()
        max_per_image = 100

        results = {}
        speed_list = []
        with torch.no_grad():
            for inputs in val_loader:
                img_id, inputs = inputs[0]
                start_image_time = time.time()
                segmentations = []
                for scale in inputs:
                    inputs[scale]['image'] = inputs[scale]['image'].to(
                        cfg.device)
                    # dict_tensor = torch.from_numpy(dictionary.astype(np.float32)).to(cfg.device, non_blocking=True)
                    # dict_tensor.requires_grad = False

                    # hmap, regs, w_h_, _, _, codes, offsets = model(inputs[scale]['image'])[-1]
                    hmap, regs, w_h_, codes, offsets = model(
                        inputs[scale]['image'])[-1]
                    output = [hmap, regs, w_h_, codes, offsets]

                    segms = ctsegm_inmodal_code_decode(
                        *output,
                        torch.from_numpy(dictionary.astype(np.float32)).to(
                            cfg.device),
                        K=cfg.test_topk)

                    segms = segms.detach().cpu().numpy().reshape(
                        1, -1, segms.shape[2])[0]

                    top_preds = {}
                    for j in range(cfg.n_vertices):
                        segms[:, 2 * j:2 * j + 2] = transform_preds(
                            segms[:, 2 * j:2 * j + 2], inputs[scale]['center'],
                            inputs[scale]['scale'],
                            (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
                    segms[:, cfg.n_vertices * 2:cfg.n_vertices * 2 +
                          2] = transform_preds(
                              segms[:,
                                    cfg.n_vertices * 2:cfg.n_vertices * 2 + 2],
                              inputs[scale]['center'], inputs[scale]['scale'],
                              (inputs[scale]['fmap_w'],
                               inputs[scale]['fmap_h']))
                    segms[:, cfg.n_vertices * 2 + 2:cfg.n_vertices * 2 +
                          4] = transform_preds(
                              segms[:, cfg.n_vertices * 2 +
                                    2:cfg.n_vertices * 2 + 4],
                              inputs[scale]['center'], inputs[scale]['scale'],
                              (inputs[scale]['fmap_w'],
                               inputs[scale]['fmap_h']))

                    clses = segms[:, -1]
                    for j in range(val_dataset.num_classes):
                        inds = (clses == j)
                        top_preds[j + 1] = segms[inds, :cfg.n_vertices * 2 +
                                                 5].astype(np.float32)
                        top_preds[j + 1][:, :cfg.n_vertices * 2 + 4] /= scale

                    segmentations.append(top_preds)

                end_image_time = time.time()
                segms_and_scores = {
                    j: np.concatenate([d[j] for d in segmentations], axis=0)
                    for j in range(1, val_dataset.num_classes + 1)
                }
                scores = np.hstack([
                    segms_and_scores[j][:, cfg.n_vertices * 2 + 4]
                    for j in range(1, val_dataset.num_classes + 1)
                ])
                if len(scores) > max_per_image:
                    kth = len(scores) - max_per_image
                    thresh = np.partition(scores, kth)[kth]
                    for j in range(1, val_dataset.num_classes + 1):
                        keep_inds = (
                            segms_and_scores[j][:, cfg.n_vertices * 2 + 4] >=
                            thresh)
                        segms_and_scores[j] = segms_and_scores[j][keep_inds]

                results[img_id] = segms_and_scores
                speed_list.append(end_image_time - start_image_time)

        eval_results = val_dataset.run_eval(results, save_dir=cfg.ckpt_dir)
        print_log(eval_results)
        summary_writer.add_scalar('val_mAP/mAP', eval_results[0], epoch)
        print_log('Average speed on val set:{:.2f}'.format(
            1. / np.mean(speed_list)))

        return eval_results[0]

    print_log('Starting training...')
    for epoch in range(1, cfg.num_epochs + 1):
        start = time.time()
        train_sampler.set_epoch(epoch)
        train(epoch)
        if (cfg.val_interval > 0
                and epoch % cfg.val_interval == 0) or epoch == 2:
            stat = val_map(epoch)
            if stat > best_mAP:
                print('Overall mAP {:.3f} is improving ...'.format(stat))
                print_log(saver.save(model.module.state_dict(), 'checkpoint'))
                best_mAP = stat

        lr_scheduler.step()  # move to here after pytorch1.1.0

        epoch_time = (time.time() - start) / 3600. / 24.
        print_log('ETA:{:.2f} Days'.format(
            (cfg.num_epochs - epoch) * epoch_time))

    summary_writer.close()
def main():
    saver = create_saver(cfg.local_rank, save_dir=cfg.ckpt_dir)
    logger = create_logger(cfg.local_rank, save_dir=cfg.log_dir)
    summary_writer = create_summary(cfg.local_rank, log_dir=cfg.log_dir)
    print = logger.info
    print(cfg)

    torch.manual_seed(319)
    torch.backends.cudnn.benchmark = True  # disable this if OOM at beginning of training

    num_gpus = torch.cuda.device_count()
    if cfg.dist:
        cfg.device = torch.device('cuda:%d' % cfg.local_rank)
        torch.cuda.set_device(cfg.local_rank)
        dist.init_process_group(backend='nccl',
                                init_method='env://',
                                world_size=num_gpus,
                                rank=cfg.local_rank)
    else:
        cfg.device = torch.device('cuda:%d' % cfg.device_id)

    print('Setting up data...')
    dictionary = np.load(cfg.dictionary_file)
    Dataset = COCOSEGMSHIFT if cfg.dataset == 'coco' else PascalVOC
    train_dataset = Dataset(cfg.data_dir,
                            cfg.dictionary_file,
                            'train',
                            split_ratio=cfg.split_ratio,
                            img_size=cfg.img_size)
    train_sampler = torch.utils.data.distributed.DistributedSampler(
        train_dataset, num_replicas=num_gpus, rank=cfg.local_rank)
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=cfg.batch_size // num_gpus if cfg.dist else cfg.batch_size,
        shuffle=not cfg.dist,
        num_workers=cfg.num_workers,
        pin_memory=False,
        drop_last=True,
        sampler=train_sampler if cfg.dist else None)

    Dataset_eval = COCO_eval_segm_shift if cfg.dataset == 'coco' else PascalVOC_eval
    val_dataset = Dataset_eval(cfg.data_dir,
                               cfg.dictionary_file,
                               'val',
                               test_scales=[1.],
                               test_flip=False)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=1,
                                             pin_memory=False,
                                             collate_fn=val_dataset.collate_fn)

    print('Creating model...')
    if 'hourglass' in cfg.arch:
        model = get_hourglass[cfg.arch]
    elif 'resdcn' in cfg.arch:
        model = get_pose_net(num_layers=int(cfg.arch.split('_')[-1]),
                             num_classes=train_dataset.num_classes)
    else:
        raise NotImplementedError

    if cfg.dist:
        # model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
        model = model.to(cfg.device)
        model = nn.parallel.DistributedDataParallel(
            model, device_ids=[
                cfg.local_rank,
            ], output_device=cfg.local_rank)
    else:
        model = nn.DataParallel(model, device_ids=[
            cfg.local_rank,
        ]).to(cfg.device)

    if cfg.pretrain_checkpoint is not None and os.path.isfile(
            cfg.pretrain_checkpoint):
        print('Load pretrain model from ' + cfg.pretrain_checkpoint)
        model = load_model(model, cfg.pretrain_checkpoint, cfg.device_id)
        torch.cuda.empty_cache()

    optimizer = torch.optim.Adam(model.parameters(), cfg.lr)
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                        cfg.lr_step,
                                                        gamma=0.1)

    def train(epoch):
        print('\n Epoch: %d' % epoch)
        model.train()
        tic = time.perf_counter()
        for batch_idx, batch in enumerate(train_loader):
            for k in batch:
                if k != 'meta':
                    batch[k] = batch[k].to(device=cfg.device,
                                           non_blocking=True)

            outputs = model(batch['image'])
            hmap, regs, w_h_, codes_ = zip(*outputs)

            regs = [
                _tranpose_and_gather_feature(r, batch['inds']) for r in regs
            ]
            w_h_ = [
                _tranpose_and_gather_feature(r, batch['inds']) for r in w_h_
            ]
            codes_ = [
                _tranpose_and_gather_feature(r, batch['inds']) for r in codes_
            ]

            hmap_loss = _neg_loss(hmap, batch['hmap'])
            reg_loss = _reg_loss(regs, batch['regs'], batch['ind_masks'])
            w_h_loss = _reg_loss(w_h_, batch['w_h_'], batch['ind_masks'])
            codes_loss = norm_reg_loss(codes_, batch['codes'],
                                       batch['ind_masks'])
            # codes_loss = mse_reg_loss(codes_, batch['codes'], batch['ind_masks'])
            loss = hmap_loss + 1 * reg_loss + 0.1 * w_h_loss + cfg.code_loss_weight * codes_loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if batch_idx % cfg.log_interval == 0:
                duration = time.perf_counter() - tic
                tic = time.perf_counter()
                print(
                    '[%d/%d-%d/%d] ' %
                    (epoch, cfg.num_epochs, batch_idx, len(train_loader)) +
                    ' hmap_loss= %.3f reg_loss= %.3f w_h_loss= %.3f  code_loss= %.3f'
                    % (hmap_loss.item(), reg_loss.item(), w_h_loss.item(),
                       codes_loss.item()) + ' (%d samples/sec)' %
                    (cfg.batch_size * cfg.log_interval / duration))

                step = len(train_loader) * epoch + batch_idx
                summary_writer.add_scalar('hmap_loss', hmap_loss.item(), step)
                summary_writer.add_scalar('reg_loss', reg_loss.item(), step)
                summary_writer.add_scalar('w_h_loss', w_h_loss.item(), step)
                summary_writer.add_scalar('code_loss', codes_loss.item(), step)
        return

    def val_map(epoch):
        print('\n Val@Epoch: %d' % epoch)
        model.eval()
        torch.cuda.empty_cache()
        max_per_image = 100

        results = {}
        input_scales = {}
        speed_list = []
        with torch.no_grad():
            for inputs in val_loader:
                img_id, inputs = inputs[0]
                start_image_time = time.time()
                segmentations = []
                for scale in inputs:
                    inputs[scale]['image'] = inputs[scale]['image'].to(
                        cfg.device)
                    if scale == 1. and img_id not in input_scales.keys(
                    ):  # keep track of the input image Sizes
                        _, _, input_h, input_w = inputs[scale]['image'].shape
                        input_scales[img_id] = {'h': input_h, 'w': input_w}

                    output = model(inputs[scale]['image'])[-1]

                    segms = ctsegm_shift_decode(
                        *output,
                        torch.from_numpy(dictionary.astype(np.float32)).to(
                            cfg.device),
                        K=cfg.test_topk)
                    segms = segms.detach().cpu().numpy().reshape(
                        1, -1, segms.shape[2])[0]

                    top_preds = {}
                    for j in range(cfg.n_vertices):
                        segms[:, 2 * j:2 * j + 2] = transform_preds(
                            segms[:, 2 * j:2 * j + 2], inputs[scale]['center'],
                            inputs[scale]['scale'],
                            (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
                    segms[:, cfg.n_vertices * 2:cfg.n_vertices * 2 +
                          2] = transform_preds(
                              segms[:,
                                    cfg.n_vertices * 2:cfg.n_vertices * 2 + 2],
                              inputs[scale]['center'], inputs[scale]['scale'],
                              (inputs[scale]['fmap_w'],
                               inputs[scale]['fmap_h']))
                    segms[:, cfg.n_vertices * 2 + 2:cfg.n_vertices * 2 +
                          4] = transform_preds(
                              segms[:, cfg.n_vertices * 2 +
                                    2:cfg.n_vertices * 2 + 4],
                              inputs[scale]['center'], inputs[scale]['scale'],
                              (inputs[scale]['fmap_w'],
                               inputs[scale]['fmap_h']))

                    clses = segms[:, -1]
                    for j in range(val_dataset.num_classes):
                        inds = (clses == j)
                        top_preds[j + 1] = segms[inds, :cfg.n_vertices * 2 +
                                                 5].astype(np.float32)
                        top_preds[j + 1][:, :cfg.n_vertices * 2 + 4] /= scale

                    segmentations.append(top_preds)

                end_image_time = time.time()
                segms_and_scores = {
                    j: np.concatenate([d[j] for d in segmentations], axis=0)
                    for j in range(1, val_dataset.num_classes + 1)
                }
                scores = np.hstack([
                    segms_and_scores[j][:, cfg.n_vertices * 2 + 4]
                    for j in range(1, val_dataset.num_classes + 1)
                ])
                if len(scores) > max_per_image:
                    kth = len(scores) - max_per_image
                    thresh = np.partition(scores, kth)[kth]
                    for j in range(1, val_dataset.num_classes + 1):
                        keep_inds = (
                            segms_and_scores[j][:, cfg.n_vertices * 2 + 4] >=
                            thresh)
                        segms_and_scores[j] = segms_and_scores[j][keep_inds]

                results[img_id] = segms_and_scores
                speed_list.append(end_image_time - start_image_time)

        eval_results = val_dataset.run_eval(results,
                                            input_scales,
                                            save_dir=cfg.ckpt_dir)
        print(eval_results)
        summary_writer.add_scalar('val_mAP/mAP', eval_results[0], epoch)
        print('Average speed on val set:{:.2f}'.format(1. /
                                                       np.mean(speed_list)))

    print('Starting training...')
    for epoch in range(1, cfg.num_epochs + 1):
        start = time.time()
        train_sampler.set_epoch(epoch)
        train(epoch)
        if (cfg.val_interval > 0
                and epoch % cfg.val_interval == 0) or epoch == 3:
            val_map(epoch)
            print(saver.save(model.module.state_dict(), 'checkpoint'))
        lr_scheduler.step(epoch)  # move to here after pytorch1.1.0

        epoch_time = (time.time() - start) / 3600. / 24.
        print('ETA:{:.2f} Days'.format((cfg.num_epochs - epoch) * epoch_time))

    summary_writer.close()
Ejemplo n.º 5
0
def main():
    saver = create_saver(cfg.local_rank, save_dir=cfg.ckpt_dir)
    logger = create_logger(cfg.local_rank, save_dir=cfg.log_dir)
    summary_writer = create_summary(cfg.local_rank, log_dir=cfg.log_dir)
    print = logger.info
    print(cfg)

    torch.manual_seed(317)
    # disable this if OOM at beginning of training
    torch.backends.cudnn.benchmark = True

    num_gpus = torch.cuda.device_count()
    if cfg.dist:
        cfg.device = torch.device('cuda:%d' % cfg.local_rank)
        torch.cuda.set_device(cfg.local_rank)
        dist.init_process_group(backend='nccl',
                                init_method='env://',
                                world_size=num_gpus,
                                rank=cfg.local_rank)
    else:
        cfg.device = torch.device('cuda')

    print('Setting up data...')
    Dataset = COCO if cfg.dataset == 'coco' else PascalVOC

    train_dataset = Dataset(cfg.data_dir,
                            'train',
                            split_ratio=cfg.split_ratio,
                            img_size=cfg.img_size)

    train_sampler = torch.utils.data.distributed.DistributedSampler(
        train_dataset, num_replicas=num_gpus, rank=cfg.local_rank)

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=cfg.batch_size // num_gpus if cfg.dist else cfg.batch_size,
        shuffle=not cfg.dist,
        num_workers=cfg.num_workers,
        pin_memory=True,
        drop_last=True,
        sampler=train_sampler if cfg.dist else None)

    Dataset_eval = COCO_eval if cfg.dataset == 'coco' else PascalVOC_eval

    val_dataset = Dataset_eval(cfg.data_dir,
                               'test',
                               test_scales=[1.],
                               test_flip=False)

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=1,
                                             pin_memory=True,
                                             collate_fn=val_dataset.collate_fn)

    print('Creating model...')
    if 'hourglass' in cfg.arch:
        model = get_hourglass[cfg.arch]
    elif 'resdcn' in cfg.arch:
        model = get_pose_net(num_layers=int(cfg.arch.split('_')[-1]),
                             num_classes=train_dataset.num_classes)
    else:
        raise NotImplementedError

    if cfg.dist:
        # model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
        model = model.to(cfg.device)
        model = nn.parallel.DistributedDataParallel(
            model, device_ids=[
                cfg.local_rank,
            ], output_device=cfg.local_rank)
    else:
        model = nn.DataParallel(model).to(cfg.device)

    if os.path.isfile(cfg.pretrain_dir):
        model = load_model(model, cfg.pretrain_dir)

    optimizer = torch.optim.Adam(model.parameters(), cfg.lr)
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                        cfg.lr_step,
                                                        gamma=0.1)

    def train(epoch):
        print('\n Epoch: %d' % epoch)
        model.train()
        tic = time.perf_counter()
        for batch_idx, batch in enumerate(train_loader):
            for k in batch:
                if k != 'meta':
                    batch[k] = batch[k].to(device=cfg.device,
                                           non_blocking=True)

            outputs = model(batch['image'])

            # 得到heat map, reg, wh 三个变量
            hmap, regs, w_h_ = zip(*outputs)

            regs = [
                _tranpose_and_gather_feature(r, batch['inds']) for r in regs
            ]
            w_h_ = [
                _tranpose_and_gather_feature(r, batch['inds']) for r in w_h_
            ]

            # 分别计算loss
            hmap_loss = _neg_loss(hmap, batch['hmap'])
            reg_loss = _reg_loss(regs, batch['regs'], batch['ind_masks'])
            w_h_loss = _reg_loss(w_h_, batch['w_h_'], batch['ind_masks'])

            # 进行loss加权,得到最终loss
            loss = hmap_loss + 1 * reg_loss + 0.1 * w_h_loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if batch_idx % cfg.log_interval == 0:
                duration = time.perf_counter() - tic
                tic = time.perf_counter()
                print('[%d/%d-%d/%d] ' %
                      (epoch, cfg.num_epochs, batch_idx, len(train_loader)) +
                      ' hmap_loss= %.5f reg_loss= %.5f w_h_loss= %.5f' %
                      (hmap_loss.item(), reg_loss.item(), w_h_loss.item()) +
                      ' (%d samples/sec)' %
                      (cfg.batch_size * cfg.log_interval / duration))

                step = len(train_loader) * epoch + batch_idx
                summary_writer.add_scalar('hmap_loss', hmap_loss.item(), step)
                summary_writer.add_scalar('reg_loss', reg_loss.item(), step)
                summary_writer.add_scalar('w_h_loss', w_h_loss.item(), step)
        return

    def val_map(epoch):
        print('\n Val@Epoch: %d' % epoch)
        model.eval()
        torch.cuda.empty_cache()
        max_per_image = 100

        results = {}
        with torch.no_grad():
            for inputs in val_loader:
                img_id, inputs = inputs[0]

                detections = []
                for scale in inputs:
                    inputs[scale]['image'] = inputs[scale]['image'].to(
                        cfg.device)
                    output = model(inputs[scale]['image'])[-1]

                    dets = ctdet_decode(*output, K=cfg.test_topk)
                    dets = dets.detach().cpu().numpy().reshape(
                        1, -1, dets.shape[2])[0]

                    top_preds = {}
                    dets[:, :2] = transform_preds(
                        dets[:, 0:2], inputs[scale]['center'],
                        inputs[scale]['scale'],
                        (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
                    dets[:, 2:4] = transform_preds(
                        dets[:, 2:4], inputs[scale]['center'],
                        inputs[scale]['scale'],
                        (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
                    clses = dets[:, -1]
                    for j in range(val_dataset.num_classes):
                        inds = (clses == j)
                        top_preds[j + 1] = dets[inds, :5].astype(np.float32)
                        top_preds[j + 1][:, :4] /= scale

                    detections.append(top_preds)

                bbox_and_scores = {
                    j: np.concatenate([d[j] for d in detections], axis=0)
                    for j in range(1, val_dataset.num_classes + 1)
                }
                scores = np.hstack([
                    bbox_and_scores[j][:, 4]
                    for j in range(1, val_dataset.num_classes + 1)
                ])
                if len(scores) > max_per_image:
                    kth = len(scores) - max_per_image
                    thresh = np.partition(scores, kth)[kth]
                    for j in range(1, val_dataset.num_classes + 1):
                        keep_inds = (bbox_and_scores[j][:, 4] >= thresh)
                        bbox_and_scores[j] = bbox_and_scores[j][keep_inds]

                results[img_id] = bbox_and_scores

        eval_results = val_dataset.run_eval(results, save_dir=cfg.ckpt_dir)
        print(eval_results)
        summary_writer.add_scalar('val_mAP/mAP', eval_results[0], epoch)

    print('Starting training...')
    for epoch in range(1, cfg.num_epochs + 1):
        train_sampler.set_epoch(epoch)
        train(epoch)
        if cfg.val_interval > 0 and epoch % cfg.val_interval == 0:
            val_map(epoch)
        print(saver.save(model.module.state_dict(), 'checkpoint'))
        lr_scheduler.step(epoch)  # move to here after pytorch1.1.0

    summary_writer.close()
Ejemplo n.º 6
0
def main():
    logger = create_logger(save_dir=cfg.log_dir)
    print = logger.info
    print(cfg)

    torch.manual_seed(317)
    torch.backends.cudnn.benchmark = False
    cfg.device = torch.device('cuda')

    print('Setting up data...')
    Dataset_eval = KAIST_eval
    dataset = Dataset_eval(cfg.data_dir,
                           'test',
                           test_scales=cfg.test_scales,
                           test_flip=cfg.test_flip)
    val_loader = torch.utils.data.DataLoader(dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=1,
                                             pin_memory=True,
                                             collate_fn=dataset.collate_fn)

    print('Creating model...')
    if 'hourglass' in cfg.arch:
        model = get_hourglass[cfg.arch]
    else:
        raise NotImplementedError

    model = nn.DataParallel(model).to(cfg.device)

    if (os.path.exists(cfg.pretrain_dir)):
        model.load_state_dict(torch.load(cfg.pretrain_dir))
        print('loaded pretrained model from %s !' % cfg.pretrain_dir)

    print('test starts at %s' % datetime.now())
    model.eval()
    results = {}
    with torch.no_grad():
        for inputs in val_loader:
            img_id, inputs = inputs[0]
            detections = []
            for scale in inputs:
                inputs[scale]['img_rgb'] = inputs[scale]['img_rgb'].to(
                    cfg.device)
                inputs[scale]['img_ir'] = inputs[scale]['img_ir'].to(
                    cfg.device)
                output = model(
                    (inputs[scale]['img_rgb'], inputs[scale]['img_ir']))[-1]
                dets = _decode(*output,
                               ae_threshold=cfg.ae_threshold,
                               K=cfg.topk,
                               kernel=3)
                dets = dets.reshape(dets.shape[0], -1,
                                    8).detach().cpu().numpy()
                if dets.shape[0] == 2:
                    dets[1, :,
                         [0, 2]] = inputs[scale]['fmap_size'][0,
                                                              1] - dets[1, :,
                                                                        [2, 0]]
                dets = dets.reshape(1, -1, 8)

                _rescale_dets(dets, inputs[scale]['ratio'],
                              inputs[scale]['border'], inputs[scale]['size'])
                dets[:, :, 0:4] /= scale
                detections.append(dets)

            detections = np.concatenate(detections, axis=1)[0]
            # reject detections with negative scores
            detections = detections[detections[:, 4] > -1]
            classes = detections[..., -1]

            results[img_id] = {}
            for j in range(dataset.num_classes):
                keep_inds = (classes == j)
                results[img_id][j + 1] = detections[keep_inds][:, 0:7].astype(
                    np.float32)
                bboxes = results[img_id][j + 1]
                bboxes = bboxes[(bboxes[:, 5] != 0.0) & (bboxes[:, 6] != 0.0)]
                print(img_id)

                soft_nms_merge(bboxes,
                               Nt=cfg.nms_threshold,
                               method=2,
                               weight_exp=cfg.w_exp)
                # soft_nms(results[img_id][j + 1], Nt=0.5, method=2)
                results[img_id][j + 1] = results[img_id][j + 1][:, 0:5]

            scores = np.hstack([
                results[img_id][j][:, -1]
                for j in range(1, dataset.num_classes + 1)
            ])
            if len(scores) > dataset.max_objs:
                kth = len(scores) - dataset.max_objs
                thresh = np.partition(scores, kth)[kth]
                for j in range(1, dataset.num_classes + 1):
                    keep_inds = (results[img_id][j][:, -1] >= thresh)
                    results[img_id][j] = results[img_id][j][keep_inds]

    lamr = dataset.run_eval(results, run_dir=cfg.ckpt_dir)
    print('log-average miss rate = {}'.format(lamr))
    print('test ends at %s' % datetime.now())
Ejemplo n.º 7
0
def main():
    logger = create_logger(save_dir=cfg.log_dir)
    print = logger.info
    print(cfg)

    cfg.device = torch.device('cuda')
    torch.backends.cudnn.benchmark = False

    max_per_image = 100

    Dataset_eval = Damage_eval
    dataset = Dataset_eval(cfg.data_dir,
                           split='train',
                           test_scales=cfg.test_scales,
                           test_flip=cfg.test_flip)  # split test

    data_loader = torch.utils.data.DataLoader(dataset,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=1,
                                              pin_memory=True,
                                              collate_fn=dataset.collate_fn)

    print('Creating model...')
    if 'hourglass' in cfg.arch:
        model = get_hourglass[cfg.arch]
    elif 'resdcn' in cfg.arch:
        model = get_pose_net_resdcn(num_layers=18, head_conv=64, num_classes=3)
    elif cfg.arch == 'resnet':
        model = get_pose_net(num_layers=18, head_conv=64, num_classes=3)
    elif cfg.arch == 'res_CBAM':
        model = get_pose_net_resnet_CBAM(num_layers=18,
                                         head_conv=64,
                                         num_classes=3)
    elif cfg.arch == 'resnet_PAM':
        model = get_pose_net_resnet_PAM(num_layers=18,
                                        head_conv=64,
                                        num_classes=3)
    elif cfg.arch == 'resnet_SE':
        model = get_pose_net_resnet_SE(num_layers=18,
                                       head_conv=64,
                                       num_classes=3)

    def Evaluate(epoch, model):
        print('\n Evaluate@Epoch: %d' % epoch)

        start_time = time.clock()
        print('Start time %s Seconds' % start_time)

        model.eval()
        torch.cuda.empty_cache()
        max_per_image = 100

        results = {}
        with torch.no_grad():
            for inputs in data_loader:
                img_id, inputs, img_path = inputs[0]

                detections = []
                for scale in inputs:
                    inputs[scale]['image'] = inputs[scale]['image'].to(
                        cfg.device)  # (1,3)
                    output = model(
                        inputs[scale]['image'])[-1]  # hmap, regs, pxpy
                    dets = ctdet_decode(
                        *output, K=cfg.test_topk
                    )  # torch.cat([bboxes, scores, clses], dim=2)
                    dets = dets.detach().cpu().numpy().reshape(
                        1, -1, dets.shape[2])[0]

                    top_preds = {}
                    dets[:, :2] = transform_preds(
                        dets[:, 0:2], inputs[scale]['center'],
                        inputs[scale]['scale'],
                        (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
                    dets[:, 2:4] = transform_preds(
                        dets[:, 2:4], inputs[scale]['center'],
                        inputs[scale]['scale'],
                        (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
                    clses = dets[:, -1]
                    for j in range(dataset.num_classes):
                        inds = (clses == j)
                        top_preds[j + 1] = dets[inds, :5].astype(np.float32)
                        top_preds[j + 1][:, :4] /= scale

                    detections.append(top_preds)

                bbox_and_scores = {
                    j: np.concatenate([d[j] for d in detections], axis=0)
                    for j in range(1, dataset.num_classes + 1)
                }
                scores = np.hstack([
                    bbox_and_scores[j][:, 4]
                    for j in range(1, dataset.num_classes + 1)
                ])
                if len(scores) > max_per_image:
                    kth = len(scores) - max_per_image
                    thresh = np.partition(scores, kth)[kth]
                    for j in range(1, dataset.num_classes + 1):
                        keep_inds = (bbox_and_scores[j][:, 4] >= thresh)
                        bbox_and_scores[j] = bbox_and_scores[j][keep_inds]

                results[img_id] = bbox_and_scores

        end_time = time.clock()

        eval_results = dataset.run_eval(results, save_dir=cfg.ckpt_dir)
        print(eval_results)

        print('End time %s Seconds' % end_time)
        Run_time = end_time - start_time
        FPS = 100 / Run_time  # replace 100 with the number of images
        print('FPS %s ' % FPS)

        #summary_writer.add_scalar('Evaluate_mAP/mAP', eval_results[0], epoch)
        return eval_results[0]

    num_epochs = 60  # replace 60 with the number of epoch
    Max_mAP = 0

    for epoch in range(1, num_epochs + 1):
        cfg.pretrain_dir = os.path.join(cfg.ckpt_dir, 'checkpoint_epoch' +
                                        str(epoch) + '.t7')  # the address
        model = load_model(model, cfg.pretrain_dir)
        model = model.to(cfg.device)

        mAP = Evaluate(epoch, model)
        if mAP > Max_mAP:
            Max_mAP = mAP
            print('Max_AP=%s' % Max_mAP)
Ejemplo n.º 8
0
def main():
  logger = create_logger(cfg.local_rank, save_dir=cfg.log_dir)
  summary_writer = create_summary(cfg.local_rank, log_dir=cfg.log_dir)
  print = logger.info

  print(cfg)
  num_gpus = torch.cuda.device_count()
  if cfg.dist:
    device = torch.device('cuda:%d' % cfg.local_rank) if cfg.dist else torch.device('cuda')
    torch.cuda.set_device(cfg.local_rank)
    dist.init_process_group(backend='nccl', init_method='env://',
                            world_size=num_gpus, rank=cfg.local_rank)
  else:
    device = torch.device('cuda')

  print('==> Preparing data..')
  train_dataset = ImgNet_split(root=os.path.join(cfg.data_dir, 'train'),
                               transform=imgnet_transform(is_training=True))
  train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset,
                                                                  num_replicas=num_gpus,
                                                                  rank=cfg.local_rank)
  train_loader = torch.utils.data.DataLoader(train_dataset,
                                             batch_size=cfg.batch_size // num_gpus if cfg.dist
                                             else cfg.batch_size,
                                             shuffle=not cfg.dist,
                                             num_workers=cfg.num_workers,
                                             sampler=train_sampler if cfg.dist else None)

  val_dataset = ImgNet_split(root=os.path.join(cfg.data_dir, 'val'),
                             transform=imgnet_transform(is_training=False))
  val_loader = torch.utils.data.DataLoader(val_dataset,
                                           batch_size=cfg.batch_size,
                                           shuffle=False,
                                           num_workers=cfg.num_workers)

  print('==> Building model..')
  genotype = torch.load(os.path.join(cfg.ckpt_dir, 'genotype.pickle'))['genotype']
  model = NetworkImageNet(genotype, cfg.init_ch, cfg.num_cells, cfg.auxiliary, num_classes=1000)

  if not cfg.dist:
    model = nn.DataParallel(model).to(device)
  else:
    # model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
    model = model.to(device)
    model = nn.parallel.DistributedDataParallel(model,
                                                device_ids=[cfg.local_rank, ],
                                                output_device=cfg.local_rank)

  optimizer = torch.optim.SGD(model.parameters(), cfg.lr, momentum=0.9, weight_decay=cfg.wd)
  criterion = CrossEntropyLabelSmooth(num_classes=1000, epsilon=cfg.label_smooth).to(device)
  scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.97)
  warmup = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=2)

  # Training
  def train(epoch):
    model.train()

    start_time = time.time()
    for batch_idx, (inputs, targets) in enumerate(train_loader):
      inputs, targets = inputs.to(device), targets.to(device, non_blocking=True)

      outputs, outputs_aux = model(inputs)
      loss = criterion(outputs, targets)
      loss_aux = criterion(outputs_aux, targets)
      loss += cfg.auxiliary * loss_aux

      optimizer.zero_grad()
      loss.backward()
      nn.utils.clip_grad_norm_(model.parameters(), 5.0)
      optimizer.step()

      if batch_idx % cfg.log_interval == 0:
        step = len(train_loader) * epoch + batch_idx
        duration = time.time() - start_time

        print('[%d/%d - %d/%d] cls_loss= %.5f (%d samples/sec)' %
              (epoch, cfg.max_epochs, batch_idx, len(train_loader),
               loss.item(), cfg.batch_size * cfg.log_interval / duration))

        start_time = time.time()
        summary_writer.add_scalar('cls_loss', loss.item(), step)
        summary_writer.add_scalar('learning rate', optimizer.param_groups[0]['lr'], step)

  def val(epoch):
    # switch to evaluate mode
    model.eval()
    top1 = 0
    top5 = 0
    with torch.no_grad():
      for i, (inputs, targets) in enumerate(val_loader):
        inputs, targets = inputs.to(device), targets.to(device, non_blocking=True)

        output, _ = model(inputs)

        # measure accuracy and record loss
        _, pred = output.data.topk(5, dim=1, largest=True, sorted=True)
        pred = pred.t()
        correct = pred.eq(targets.view(1, -1).expand_as(pred))

        top1 += correct[:1].view(-1).float().sum(0, keepdim=True).item()
        top5 += correct[:5].view(-1).float().sum(0, keepdim=True).item()

    top1 *= 100 / len(val_dataset)
    top5 *= 100 / len(val_dataset)
    print(' Precision@1 ==> %.2f%%  Precision@1: %.2f%%\n' % (top1, top5))
    summary_writer.add_scalar('Precision@1', top1, epoch)
    summary_writer.add_scalar('Precision@5', top5, epoch)
    return

  for epoch in range(cfg.max_epochs):
    print('\nEpoch: %d lr: %.5f drop_path_prob: %.3f' %
          (epoch, scheduler.get_lr()[0], cfg.drop_path_prob * epoch / cfg.max_epochs))
    model.module.drop_path_prob = cfg.drop_path_prob * epoch / cfg.max_epochs
    train_sampler.set_epoch(epoch)
    train(epoch)
    val(epoch)
    if epoch < 5:
      warmup.step(epoch)
    else:
      scheduler.step(epoch)  # move to here after pytorch1.1.0
    print(model.module.genotype())
    if cfg.local_rank == 0:
      torch.save(model.state_dict(), os.path.join(cfg.ckpt_dir, 'checkpoint.t7'))

  summary_writer.close()
  count_parameters(model)
  count_flops(model, input_size=224)
Ejemplo n.º 9
0
def main():
    saver = create_saver(cfg.local_rank, save_dir=cfg.ckpt_dir)
    logger = create_logger(cfg.local_rank, save_dir=cfg.log_dir)
    summary_writer = create_summary(cfg.local_rank, log_dir=cfg.log_dir)
    print = logger.info
    print(cfg)

    torch.manual_seed(317)
    torch.backends.cudnn.benchmark = True  # disable this if OOM at beginning of training

    num_gpus = torch.cuda.device_count()
    if cfg.dist:
        cfg.device = torch.device('cuda:%d' % cfg.local_rank)
        torch.cuda.set_device(cfg.local_rank)
        dist.init_process_group(backend='nccl', init_method='env://',
                                world_size=num_gpus, rank=cfg.local_rank)
    else:
        cfg.device = torch.device('cuda')

    print('Setting up data...')
    Dataset = KAIST
    train_dataset = Dataset(cfg.data_dir, 'train', img_size=cfg.img_size)
    train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset,
                                                                    num_replicas=num_gpus,
                                                                    rank=cfg.local_rank)
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=cfg.batch_size // num_gpus
                                               if cfg.dist else cfg.batch_size,
                                               shuffle=not cfg.dist,
                                               num_workers=cfg.num_workers,
                                               pin_memory=True,
                                               drop_last=True,
                                               sampler=train_sampler if cfg.dist else None)

    print('Creating model...')
    if 'hourglass' in cfg.arch:
        model = get_hourglass[cfg.arch]
    else:
        raise NotImplementedError

    if cfg.dist:
        # model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
        model = model.to(cfg.device)
        model = nn.parallel.DistributedDataParallel(model,
                                                    device_ids=[cfg.local_rank, ],
                                                    output_device=cfg.local_rank)
    else:
        model = nn.DataParallel(model).to(cfg.device)
    
    if(os.path.exists(cfg.pretrain_dir)):
        model.module.load_state_dict(torch.load(cfg.pretrain_dir))
        print('loaded pretrained model from %s !' % cfg.pretrain_dir)

    optimizer = torch.optim.Adam(model.parameters(), cfg.lr)
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, cfg.lr_step, gamma=0.1)

    def train(epoch):
        print('\n%s Epoch: %d' % (datetime.now(), epoch))
        model.train()
        tic = time.perf_counter()
        epoch_start = True
        for batch_idx, batch in enumerate(train_loader):
            for k in batch:
                batch[k] = batch[k].to(device=cfg.device, non_blocking=True)

            outputs = model((batch['img_rgb'], batch["img_ir"]))
            hmap_tl, hmap_br, embd_tl, embd_br, regs_tl, regs_br = zip(*outputs)

            embd_tl = [_tranpose_and_gather_feature(e, batch['inds_tl']) for e in embd_tl]
            embd_br = [_tranpose_and_gather_feature(e, batch['inds_br']) for e in embd_br]
            regs_tl = [_tranpose_and_gather_feature(r, batch['inds_tl']) for r in regs_tl]
            regs_br = [_tranpose_and_gather_feature(r, batch['inds_br']) for r in regs_br]


            focal_loss = _neg_loss(hmap_tl, batch['hmap_tl']) + \
                         _neg_loss(hmap_br, batch['hmap_br'])
            reg_loss = _reg_loss(regs_tl, batch['regs_tl'], batch['ind_masks']) + \
                       _reg_loss(regs_br, batch['regs_br'], batch['ind_masks'])
            pull_loss, push_loss = _ae_loss(embd_tl, embd_br, batch['ind_masks'])

            loss = focal_loss + 0.1 * pull_loss + 0.1 * push_loss + reg_loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if batch_idx % cfg.log_interval == 0:
                duration = time.perf_counter() - tic
                tic = time.perf_counter()
                print('[%d/%d-%d/%d] ' % (epoch, cfg.num_epochs, batch_idx, len(train_loader)) +
                      ' focal_loss= %.5f pull_loss= %.5f push_loss= %.5f reg_loss= %.5f' %
                      (focal_loss.item(), pull_loss.item(), push_loss.item(), reg_loss.item()) +
                      ' (%d samples/sec)' % (cfg.batch_size * cfg.log_interval / duration))

                step = len(train_loader) * epoch + batch_idx
                summary_writer.add_scalar('focal_loss', focal_loss.item(), step)
                summary_writer.add_scalar('pull_loss', pull_loss.item(), step)
                summary_writer.add_scalar('push_loss', push_loss.item(), step)
                summary_writer.add_scalar('reg_loss', reg_loss.item(), step)
        return

    print('Starting training...')
    for epoch in range(1, cfg.num_epochs + 1):
        train_sampler.set_epoch(epoch)
        train(epoch)
        print(saver.save(model.state_dict(), 'checkpoint'))
        lr_scheduler.step(epoch)

    summary_writer.close()
Ejemplo n.º 10
0
def main():
    logger = create_logger(cfg.local_rank, save_dir=cfg.log_dir)
    summary_writer = create_summary(cfg.local_rank, log_dir=cfg.log_dir)
    print = logger.info

    print(cfg)
    num_gpus = torch.cuda.device_count()
    if cfg.dist:
        device = torch.device(
            'cuda:%d' % cfg.local_rank) if cfg.dist else torch.device('cuda')
        torch.cuda.set_device(cfg.local_rank)
        dist.init_process_group(backend='nccl',
                                init_method='env://',
                                world_size=num_gpus,
                                rank=cfg.local_rank)
    else:
        device = torch.device('cuda')

    print('==> Preparing data..')
    cifar = 100 if 'cifar100' in cfg.log_name else 10
    train_dataset = CIFAR_split(cifar=cifar,
                                root=cfg.data_dir,
                                split='train',
                                ratio=1.0,
                                transform=cifar_search_transform(
                                    is_training=True, cutout=cfg.cutout))
    train_sampler = torch.utils.data.distributed.DistributedSampler(
        train_dataset, num_replicas=num_gpus, rank=cfg.local_rank)
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=cfg.batch_size // num_gpus if cfg.dist else cfg.batch_size,
        shuffle=not cfg.dist,
        num_workers=cfg.num_workers,
        sampler=train_sampler if cfg.dist else None)

    test_dataset = CIFAR_split(
        cifar=cifar,
        root=cfg.data_dir,
        split='test',
        transform=cifar_search_transform(is_training=False))
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=cfg.batch_size,
                                              shuffle=False,
                                              num_workers=cfg.num_workers)

    print('==> Building model..')
    print(os.path.join(cfg.ckpt_dir, 'seed-14880-best-genotype.pth'))
    #genotype = torch.load(os.path.join(cfg.ckpt_dir, 'seed-14880-best-genotype.pth'))
    genotype = seed14880
    model = NetworkCIFAR(genotype,
                         cfg.init_ch,
                         cfg.num_cells,
                         cfg.auxiliary,
                         num_classes=cifar)

    if not cfg.dist:
        model = nn.DataParallel(model).to(device)
    else:
        # model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
        model = model.to(device)
        model = nn.parallel.DistributedDataParallel(
            model, device_ids=[
                cfg.local_rank,
            ], output_device=cfg.local_rank)

    optimizer = torch.optim.SGD(model.parameters(),
                                cfg.lr,
                                momentum=0.9,
                                weight_decay=cfg.wd)
    criterion = nn.CrossEntropyLoss().to(device)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, cfg.max_epochs)

    # Training
    def train(epoch):
        model.train()

        start_time = time.time()
        for batch_idx, (inputs, targets) in enumerate(train_loader):
            inputs, targets = inputs.to(device), targets.to(device,
                                                            non_blocking=True)

            # very important
            outputs, outputs_aux = model(inputs)
            loss = criterion(outputs, targets)
            loss_aux = criterion(outputs_aux, targets)
            loss += cfg.auxiliary * loss_aux

            optimizer.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 5.0)
            optimizer.step()

            if batch_idx % cfg.log_interval == 0:
                step = len(train_loader) * epoch + batch_idx
                duration = time.time() - start_time

                print('[%d/%d - %d/%d] cls_loss= %.5f (%d samples/sec)' %
                      (epoch, cfg.max_epochs, batch_idx, len(train_loader),
                       loss.item(),
                       cfg.batch_size * cfg.log_interval / duration))

                start_time = time.time()
                summary_writer.add_scalar('cls_loss', loss.item(), step)
                summary_writer.add_scalar('learning rate',
                                          optimizer.param_groups[0]['lr'],
                                          step)

    def test(epoch):
        model.eval()
        correct = 0
        with torch.no_grad():
            for batch_idx, (inputs, targets) in enumerate(test_loader):
                inputs, targets = inputs.to(device), targets.to(
                    device, non_blocking=True)

                outputs, _ = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                correct += predicted.eq(targets.data).cpu().sum().item()

            acc = 100. * correct / len(test_loader.dataset)
            print(' Precision@1 ==> %.2f%% \n' % acc)
            summary_writer.add_scalar('Precision@1', acc, global_step=epoch)
        return

    for epoch in range(cfg.max_epochs):
        print('\nEpoch: %d lr: %.5f drop_path_prob: %.3f' %
              (epoch, scheduler.get_lr()[0],
               cfg.drop_path_prob * epoch / cfg.max_epochs))
        model._modules[
            'module'].drop_path_prob = cfg.drop_path_prob * epoch / cfg.max_epochs
        train_sampler.set_epoch(epoch)
        train(epoch)
        test(epoch)
        scheduler.step(epoch)  # move to here after pytorch1.1.0
        #print(model.module.genotype())
        if cfg.local_rank == 0:
            torch.save(model.state_dict(),
                       os.path.join(cfg.ckpt_dir, 'checkpoint.t7'))

    summary_writer.close()
    count_parameters(model)
    count_flops(model, input_size=32)
Ejemplo n.º 11
0
def main():
  logger = create_logger(save_dir=cfg.log_dir)
  print = logger.info
  print(cfg)

  cfg.device = torch.device('cuda')
  torch.backends.cudnn.benchmark = False

  max_per_image = 100
  
  Dataset_eval = Damage_eval # your own data set

  # Crack RE Spalling
  dataset = Dataset_eval(cfg.data_dir, split='val', test_scales=cfg.test_scales, test_flip=cfg.test_flip) # split test
  
  data_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False,
                                            num_workers=1, pin_memory=True,
                                            collate_fn=dataset.collate_fn)
                                            
  print('Creating model...')
  if 'hourglass' in cfg.arch:
    model = get_hourglass[cfg.arch]
  elif 'resdcn' in cfg.arch:
    model = get_pose_net_resdcn(num_layers=18, head_conv=64, num_classes=3)
  elif cfg.arch == 'resnet':
    model = get_pose_net(num_layers=18, head_conv=64, num_classes=3) 
  elif cfg.arch == 'res_CBAM':
    model = get_pose_net_resnet_CBAM(num_layers=18, head_conv=64, num_classes=3)
  elif cfg.arch == 'resnet_PAM':
    model = get_pose_net_resnet_PAM(num_layers=18, head_conv=64, num_classes=3)
  elif cfg.arch == 'resnet_SE':
    model = get_pose_net_resnet_SE(num_layers=18, head_conv=64, num_classes=3)

  model = load_model(model, cfg.pretrain_dir)
  model = model.to(cfg.device)
  model.eval()

  results = {}
  with torch.no_grad():
    for inputs in tqdm(data_loader):
      img_id, inputs,img_path = inputs[0]
      print('id%s ',img_id)
      
      detections = []
      for scale in inputs:
        inputs[scale]['image'] = inputs[scale]['image'].to(cfg.device)

        output = model(inputs[scale]['image'])[-1]
        dets = ctdet_decode(*output, K=cfg.test_topk) 
        dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])[0]

        top_preds = {}
        dets[:, :2] = transform_preds(dets[:, 0:2],  
                                      inputs[scale]['center'],
                                      inputs[scale]['scale'],
                                      (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
        dets[:, 2:4] = transform_preds(dets[:, 2:4],
                                       inputs[scale]['center'],
                                       inputs[scale]['scale'],
                                       (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
        cls = dets[:, -1]
        for j in range(dataset.num_classes):
          inds = (cls == j)
          top_preds[j + 1] = dets[inds, :5].astype(np.float32) 
          top_preds[j + 1][:, :4] /= scale
        
        detections.append(top_preds)

      bbox_and_scores = {}
      for j in range(1, dataset.num_classes + 1):
        bbox_and_scores[j] = np.concatenate([d[j] for d in detections], axis=0)
        if len(dataset.test_scales) > 1:
          soft_nms(bbox_and_scores[j], Nt=0.5, method=2)
      scores = np.hstack([bbox_and_scores[j][:, 4] for j in range(1, dataset.num_classes + 1)])

      if len(scores) > max_per_image: 
        kth = len(scores) - max_per_image
        thresh = np.partition(scores, kth)[kth]
        for j in range(1, dataset.num_classes + 1):
          keep_inds = (bbox_and_scores[j][:, 4] >= thresh)
          bbox_and_scores[j] = bbox_and_scores[j][keep_inds] 

      images_test = cv2.imread(img_path)
      fig = plt.figure(0) 
      colors = COCO_COLORS
      names = COCO_NAMES
      #cv2.imwrite('E:/test1.png',images_test)
      
      plt.imshow(cv2.cvtColor(images_test, cv2.COLOR_BGR2RGB))
      for lab in bbox_and_scores: 
        for boxes in bbox_and_scores[lab]: 
          x1, y1, x2, y2, score = boxes
          if (x1 < 0):
            x1 = 0
          if (y1 < 0):
            y1 = 0
          if (x2 > 511):
            x2 = 511
          if (y2 > 511):
            y2 = 511
          
          if score > 0.2:
            plt.gca().add_patch(Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor=colors[lab], facecolor='none'))
            plt.text(x1 -12 , y1 - 12 , names[lab], bbox=dict(facecolor=colors[lab], alpha=0.5), fontsize=7, color='k')
      
      fig.patch.set_visible(False)
      Save_dir = 'data/damage/Predict_images' # save images
      Image_name = img_path[-10:] 
      Save_dir = os.path.join(Save_dir, Image_name)
      plt.axis('off')
      plt.savefig(Save_dir, dpi=400, transparent=True, bbox_inches="tight", pad_inches=0.1) # 保存
      plt.close(0) 

      results[img_id] = bbox_and_scores 

  eval_results = dataset.run_eval(results, cfg.ckpt_dir)
  print(eval_results)
Ejemplo n.º 12
0
def main():
    logger = create_logger(cfg.local_rank, save_dir=cfg.log_dir)
    summary_writer = create_summary(cfg.local_rank, log_dir=cfg.log_dir)
    print = logger.info

    print(cfg)
    num_gpus = torch.cuda.device_count()
    if cfg.dist:
        device = torch.device(
            'cuda:%d' % cfg.local_rank) if cfg.dist else torch.device('cuda')
        torch.cuda.set_device(cfg.local_rank)
        dist.init_process_group(backend='nccl',
                                init_method='env://',
                                world_size=num_gpus,
                                rank=cfg.local_rank)
    else:
        device = torch.device('cuda')

    print('==> Preparing data..')
    cifar = 100 if 'cifar100' in cfg.log_name else 10

    train_dataset = CIFAR_split(
        cifar=cifar,
        root=cfg.data_dir,
        split='train',
        ratio=0.5,
        transform=cifar_search_transform(is_training=True))
    train_sampler = torch.utils.data.distributed.DistributedSampler(
        train_dataset, num_replicas=num_gpus, rank=cfg.local_rank)
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=cfg.batch_size // num_gpus if cfg.dist else cfg.batch_size,
        shuffle=not cfg.dist,
        num_workers=cfg.num_workers,
        sampler=train_sampler if cfg.dist else None)

    val_dataset = CIFAR_split(
        cifar=cifar,
        root=cfg.data_dir,
        split='val',
        ratio=0.5,
        transform=cifar_search_transform(is_training=False))
    val_sampler = torch.utils.data.distributed.DistributedSampler(
        val_dataset, num_replicas=num_gpus, rank=cfg.local_rank)
    val_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=cfg.batch_size // num_gpus if cfg.dist else cfg.batch_size,
        shuffle=not cfg.dist,
        num_workers=cfg.num_workers,
        sampler=val_sampler if cfg.dist else None)

    print('==> Building model..')
    model = Network(C=cfg.init_ch,
                    num_cells=cfg.num_cells,
                    num_nodes=cfg.num_nodes,
                    multiplier=cfg.num_nodes,
                    num_classes=cifar)

    if not cfg.dist:
        model = nn.DataParallel(model).to(device)
    else:
        # model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
        model = model.to(device)
        model = nn.parallel.DistributedDataParallel(
            model, device_ids=[
                cfg.local_rank,
            ], output_device=cfg.local_rank)

    # proxy_model is used for 2nd order update
    if cfg.order == '2nd':
        proxy_model = Network(cfg.init_ch, cfg.num_cells, cfg.num_nodes).cuda()

    count_parameters(model)

    weights = [v for k, v in model.named_parameters() if 'alpha' not in k]
    alphas = [v for k, v in model.named_parameters() if 'alpha' in k]
    optimizer_w = optim.SGD(weights,
                            cfg.w_lr,
                            momentum=0.9,
                            weight_decay=cfg.w_wd)
    optimizer_a = optim.Adam(alphas,
                             lr=cfg.a_lr,
                             betas=(0.5, 0.999),
                             weight_decay=cfg.a_wd)
    criterion = nn.CrossEntropyLoss().cuda()
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_w,
                                                     cfg.max_epochs,
                                                     eta_min=cfg.w_min_lr)

    alphas = []

    def train(epoch):
        model.train()
        print('\nEpoch: %d lr: %f' % (epoch, scheduler.get_lr()[0]))
        alphas.append([])
        start_time = time.time()

        for batch_idx, ((inputs_w, targets_w), (inputs_a, targets_a)) \
            in enumerate(zip(train_loader, val_loader)):

            inputs_w, targets_w = inputs_w.to(device), targets_w.to(
                device, non_blocking=True)
            inputs_a, targets_a = inputs_a.to(device), targets_a.to(
                device, non_blocking=True)

            # 1. update alpha
            if epoch > cfg.a_start:
                optimizer_a.zero_grad()

                if cfg.order == '1st':
                    # using 1st order update
                    outputs = model(inputs_a)
                    val_loss = criterion(outputs, targets_a)
                    val_loss.backward()
                else:
                    # using 2nd order update
                    val_loss = update(model, proxy_model, criterion,
                                      optimizer_w, inputs_a, targets_a,
                                      inputs_w, targets_w)

                optimizer_a.step()
            else:
                val_loss = torch.tensor([0]).cuda()

            # 2. update weights
            outputs = model(inputs_w)
            cls_loss = criterion(outputs, targets_w)

            optimizer_w.zero_grad()
            cls_loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 5.0)
            optimizer_w.step()

            if batch_idx % cfg.log_interval == 0:
                step = len(train_loader) * epoch + batch_idx
                duration = time.time() - start_time

                print(
                    '[%d/%d - %d/%d] cls_loss: %5f val_loss: %5f (%d samples/sec)'
                    % (epoch, cfg.max_epochs, batch_idx, len(train_loader),
                       cls_loss.item(), val_loss.item(),
                       cfg.batch_size * cfg.log_interval / duration))

                start_time = time.time()
                summary_writer.add_scalar('cls_loss', cls_loss.item(), step)
                summary_writer.add_scalar('val_loss', val_loss.item(), step)
                summary_writer.add_scalar('learning rate',
                                          optimizer_w.param_groups[0]['lr'],
                                          step)

                alphas[-1].append(
                    model.module.alpha_normal.detach().cpu().numpy())
                alphas[-1].append(
                    model.module.alpha_reduce.detach().cpu().numpy())
        return

    def eval(epoch):
        model.eval()

        correct = 0
        total_loss = 0
        with torch.no_grad():
            for step, (inputs, targets) in enumerate(val_loader):
                inputs, targets = inputs.to(device), targets.to(
                    device, non_blocking=True)

                outputs = model(inputs)
                total_loss += criterion(outputs, targets).item()
                _, predicted = torch.max(outputs.data, 1)
                correct += predicted.eq(targets.data).cpu().sum().item()

            acc = 100. * correct / len(val_loader.dataset)
            total_loss = total_loss / len(val_loader)
            print('Val_loss==> %.5f Precision@1 ==> %.2f%% \n' %
                  (total_loss, acc))
            summary_writer.add_scalar('Precision@1', acc, global_step=epoch)
            summary_writer.add_scalar('val_loss_per_epoch',
                                      total_loss,
                                      global_step=epoch)
        return

    for epoch in range(cfg.max_epochs):
        train_sampler.set_epoch(epoch)
        val_sampler.set_epoch(epoch)
        train(epoch)
        eval(epoch)
        scheduler.step(epoch)  # move to here after pytorch1.1.0
        print(model.module.genotype())
        if cfg.local_rank == 0:
            torch.save(alphas, os.path.join(cfg.ckpt_dir, 'alphas.t7'))
            torch.save(model.state_dict(),
                       os.path.join(cfg.ckpt_dir, 'search_checkpoint.t7'))
            torch.save({'genotype': model.module.genotype()},
                       os.path.join(cfg.ckpt_dir, 'genotype.t7'))

    summary_writer.close()
def main():
    logger = create_logger(save_dir=cfg.log_dir)
    print = logger.info
    print(cfg)

    cfg.device = torch.device('cuda')
    torch.backends.cudnn.benchmark = False

    max_per_image = 100

    Dataset_eval = COCO_eval if cfg.dataset == 'coco' else PascalVOC_eval
    dataset = Dataset_eval(cfg.data_dir,
                           split='val',
                           img_size=cfg.img_size,
                           test_scales=cfg.test_scales,
                           test_flip=cfg.test_flip)
    data_loader = torch.utils.data.DataLoader(dataset,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=1,
                                              pin_memory=False,
                                              collate_fn=dataset.collate_fn)

    print('Creating model...')
    if 'hourglass' in cfg.arch:
        model = get_hourglass[cfg.arch]
    elif 'resdcn' in cfg.arch:
        model = get_pose_net(num_layers=int(cfg.arch.split('_')[-1]),
                             num_classes=dataset.num_classes)
    else:
        raise NotImplementedError

    model = load_model(model, cfg.pretrain_dir)
    model = model.to(cfg.device)
    model.eval()

    results = {}
    with torch.no_grad():
        for inputs in data_loader:
            img_id, inputs = inputs[0]

            detections = []
            for scale in inputs:
                inputs[scale]['image'] = inputs[scale]['image'].to(cfg.device)

                output = model(inputs[scale]['image'])[-1]
                dets = ctdet_decode(*output, K=cfg.test_topk)
                dets = dets.detach().cpu().numpy().reshape(
                    1, -1, dets.shape[2])[0]

                top_preds = {}
                dets[:, :2] = transform_preds(
                    dets[:,
                         0:2], inputs[scale]['center'], inputs[scale]['scale'],
                    (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
                dets[:, 2:4] = transform_preds(
                    dets[:,
                         2:4], inputs[scale]['center'], inputs[scale]['scale'],
                    (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
                cls = dets[:, -1]
                for j in range(dataset.num_classes):
                    inds = (cls == j)
                    top_preds[j + 1] = dets[inds, :5].astype(np.float32)
                    top_preds[j + 1][:, :4] /= scale

                detections.append(top_preds)

            bbox_and_scores = {}
            for j in range(1, dataset.num_classes + 1):
                bbox_and_scores[j] = np.concatenate([d[j] for d in detections],
                                                    axis=0)
                if len(dataset.test_scales) > 1:
                    soft_nms(bbox_and_scores[j], Nt=0.5, method=2)
            scores = np.hstack([
                bbox_and_scores[j][:, 4]
                for j in range(1, dataset.num_classes + 1)
            ])

            if len(scores) > max_per_image:
                kth = len(scores) - max_per_image
                thresh = np.partition(scores, kth)[kth]
                for j in range(1, dataset.num_classes + 1):
                    keep_inds = (bbox_and_scores[j][:, 4] >= thresh)
                    bbox_and_scores[j] = bbox_and_scores[j][keep_inds]

            results[img_id] = bbox_and_scores

    eval_results = dataset.run_eval(results, cfg.ckpt_dir)
    print(eval_results)
Ejemplo n.º 14
0
def main():
  saver = create_saver(cfg.local_rank, save_dir=cfg.ckpt_dir)
  logger = create_logger(cfg.local_rank, save_dir=cfg.log_dir)
  summary_writer = create_summary(cfg.local_rank, log_dir=cfg.log_dir)
  print = logger.info
  print(cfg)

  torch.manual_seed(317)
  torch.backends.cudnn.benchmark = True  # disable this if OOM at beginning of training

  num_gpus = torch.cuda.device_count()
  if cfg.dist:
    cfg.device = torch.device('cuda:%d' % cfg.local_rank)
    torch.cuda.set_device(cfg.local_rank)
    dist.init_process_group(backend='nccl', init_method='env://',
                            world_size=num_gpus, rank=cfg.local_rank)
  else:
    cfg.device = torch.device('cuda')

  print('Setting up data...')
  Dataset = COCO if cfg.dataset == 'coco' else PascalVOC
  train_dataset = Dataset(cfg.data_dir, 'train', split_ratio=cfg.split_ratio, img_size=cfg.img_size)
  train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset,
                                                                  num_replicas=num_gpus,
                                                                  rank=cfg.local_rank)
  train_loader = torch.utils.data.DataLoader(train_dataset,
                                             batch_size=cfg.batch_size // num_gpus
                                             if cfg.dist else cfg.batch_size,
                                             shuffle=not cfg.dist,
                                             num_workers=cfg.num_workers,
                                             pin_memory=True,
                                             drop_last=True,
                                             sampler=train_sampler if cfg.dist else None)

  Dataset_eval = COCO_eval if cfg.dataset == 'coco' else PascalVOC_eval
  val_dataset = Dataset_eval(cfg.data_dir, 'val', test_scales=[1.], test_flip=False)
  val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1,
                                           shuffle=False, num_workers=1, pin_memory=True,
                                           collate_fn=val_dataset.collate_fn)

  print('Creating model...')
  if 'hourglass' in cfg.arch:
    model = get_hourglass[cfg.arch]
  elif 'resdcn' in cfg.arch:
    model = get_pose_net(num_layers=int(cfg.arch.split('_')[-1]), num_classes=train_dataset.num_classes)
  else:
    raise NotImplementedError

  if cfg.dist:
    # model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
    model = model.to(cfg.device)
    model = nn.parallel.DistributedDataParallel(model,
                                                device_ids=[cfg.local_rank, ],
                                                output_device=cfg.local_rank)
  else:
    model = nn.DataParallel(model).to(cfg.device)

  if os.path.isfile(cfg.pretrain_dir):
    model = load_model(model, cfg.pretrain_dir)

  optimizer = torch.optim.Adam(model.parameters(), cfg.lr)
  lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, cfg.lr_step, gamma=0.1)

  def train(epoch):
    print('\n Epoch: %d' % epoch)
    model.train()
    tic = time.perf_counter()
    for batch_idx, batch in enumerate(train_loader):
      for k in batch:
        if k != 'meta':
          batch[k] = batch[k].to(device=cfg.device, non_blocking=True)
Ejemplo n.º 15
0
def main():
    saver = create_saver(cfg.local_rank, save_dir=cfg.ckpt_dir)
    logger = create_logger(cfg.local_rank, save_dir=cfg.log_dir)
    summary_writer = create_summary(cfg.local_rank, log_dir=cfg.log_dir)
    print = logger.info
    print(cfg)

    torch.manual_seed(300)
    torch.backends.cudnn.benchmark = True
    '''
  # you can also set like this. If you do like this, the random seed will be fixed.
  torch.manual_seed(350) 
  torch.backends.cudnn.benchmark = False  
  torch.backends.cudnn.deterministic = True  # consistent results on the cpu and gpu
  '''

    num_gpus = torch.cuda.device_count()
    if cfg.dist:
        cfg.device = torch.device('cuda:%d' % cfg.local_rank)
        torch.cuda.set_device(cfg.local_rank)
        dist.init_process_group(backend='nccl',
                                init_method='env://',
                                world_size=num_gpus,
                                rank=cfg.local_rank)
    else:
        cfg.device = torch.device('cuda')

    print('Setting up data...')
    Dataset = Damage
    train_dataset = Dataset(cfg.data_dir,
                            'train',
                            split_ratio=cfg.split_ratio,
                            img_size=cfg.img_size)
    train_sampler = torch.utils.data.distributed.DistributedSampler(
        train_dataset, num_replicas=num_gpus, rank=cfg.local_rank)
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=cfg.batch_size // num_gpus if cfg.dist else cfg.batch_size,
        shuffle=not cfg.dist,
        num_workers=cfg.num_workers,
        pin_memory=True,
        drop_last=True,
        sampler=train_sampler if cfg.dist else None)

    Dataset_eval = Damage_eval

    test_dataset = Dataset_eval(cfg.data_dir,
                                'test',
                                test_scales=[1.],
                                test_flip=False)
    test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=1,  # 测试集的batch_size
        shuffle=False,
        num_workers=1,
        pin_memory=True,  # 测试集的num_workers
        collate_fn=test_dataset.collate_fn)

    val_dataset = Dataset_eval(cfg.data_dir,
                               'val',
                               test_scales=[1.],
                               test_flip=False)
    val_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=1,  # 验证集的batch_size
        shuffle=False,
        num_workers=1,
        pin_memory=True,  # 验证集的num_workers
        collate_fn=val_dataset.collate_fn)

    print('Creating model...')
    if 'hourglass' in cfg.arch:
        model = get_hourglass[cfg.arch]
    elif 'resdcn' in cfg.arch:
        model = get_pose_net_resdcn(num_layers=18, head_conv=64, num_classes=3)
    elif cfg.arch == 'resnet':
        model = get_pose_net(num_layers=18, head_conv=64, num_classes=3)
    elif cfg.arch == 'resnet_CBAM':
        model = get_pose_net_resnet_CBAM(num_layers=18,
                                         head_conv=64,
                                         num_classes=3)
    elif cfg.arch == 'resnet_PAM':
        model = get_pose_net_resnet_PAM(num_layers=18,
                                        head_conv=64,
                                        num_classes=3)
    elif cfg.arch == 'resnet_SE':
        model = get_pose_net_resnet_SE(num_layers=18,
                                       head_conv=64,
                                       num_classes=3)

    if cfg.dist:
        # model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
        model = model.to(cfg.device)
        model = nn.parallel.DistributedDataParallel(
            model, device_ids=[
                cfg.local_rank,
            ], output_device=cfg.local_rank)
    else:
        model = nn.DataParallel(model).to(cfg.device)

    #if os.path.isfile(cfg.pretrain_dir):
    #  model = load_model(model, cfg.pretrain_dir) # 不加载预训练模型

    optimizer = torch.optim.Adam(model.parameters(), cfg.lr)
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                        cfg.lr_step,
                                                        gamma=0.1)  # adjust lr

    def train(epoch):
        print('\n Epoch: %d' % epoch)
        model.train()
        tic = time.perf_counter()
        for batch_idx, batch in enumerate(train_loader):
            for k in batch:
                if k != 'meta':
                    batch[k] = batch[k].to(device=cfg.device,
                                           non_blocking=True)

            outputs = model(batch['image'])
            hmap, regs, w_h_, pxpy = zip(*outputs)
            # batch * C(channel) * W * H
            regs = [
                _tranpose_and_gather_feature(r, batch['inds']) for r in regs
            ]
            pxpy = [
                _tranpose_and_gather_feature(r, batch['inds']) for r in pxpy
            ]
            w_h_ = [
                _tranpose_and_gather_feature(r, batch['inds']) for r in w_h_
            ]
            # batch * K * C= batch * 128 *2

            hmap_loss = _neg_loss(hmap, batch['hmap'])
            reg_loss = _SmoothL1Loss(regs, batch['regs'], batch['ind_masks'])
            pxpy_loss = _reg_loss(pxpy, batch['pxpy'], batch['ind_masks'])
            w_h_loss = _SmoothL1Loss(w_h_, batch['w_h_'], batch['ind_masks'])
            loss = hmap_loss + 10 * reg_loss + 0.1 * w_h_loss + 0.1 * pxpy_loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if batch_idx % cfg.log_interval == 0:
                duration = time.perf_counter() - tic
                tic = time.perf_counter()
                print(
                    '[%d/%d-%d/%d] ' %
                    (epoch, cfg.num_epochs, batch_idx, len(train_loader)) +
                    ' hmap_loss= %.5f reg_loss= %.5f w_h_loss= %.5f pxpy_loss= %.5f'
                    % (hmap_loss.item(), reg_loss.item(), w_h_loss.item(),
                       pxpy_loss.item()) + ' (%d samples/sec)' %
                    (cfg.batch_size * cfg.log_interval / duration))

                step = len(train_loader) * epoch + batch_idx
                summary_writer.add_scalar('hmap_loss', hmap_loss.item(), step)
                summary_writer.add_scalar('reg_loss', reg_loss.item(), step)
                summary_writer.add_scalar('w_h_loss', w_h_loss.item(), step)
                summary_writer.add_scalar('pxpy_loss', pxpy_loss.item(), step)
        return

#--------------------test set--------------------#

    def test_map(epoch):
        print('\n Test@Epoch: %d' % epoch)

        start_time = time.clock()
        print('Start time %s Seconds' % start_time)

        model.eval()
        torch.cuda.empty_cache()
        max_per_image = 100

        results = {}
        with torch.no_grad():
            for inputs in test_loader:
                img_id, inputs, img_path = inputs[0]

                detections = []
                for scale in inputs:
                    inputs[scale]['image'] = inputs[scale]['image'].to(
                        cfg.device)  # (1,3)
                    output = model(inputs[scale]['image'])[-1]
                    dets = ctdet_decode(
                        *output, K=cfg.test_topk
                    )  # torch.cat([bboxes, scores, clses], dim=2)
                    dets = dets.detach().cpu().numpy().reshape(
                        1, -1, dets.shape[2])[0]

                    top_preds = {}
                    dets[:, :2] = transform_preds(
                        dets[:, 0:2], inputs[scale]['center'],
                        inputs[scale]['scale'],
                        (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
                    dets[:, 2:4] = transform_preds(
                        dets[:, 2:4], inputs[scale]['center'],
                        inputs[scale]['scale'],
                        (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
                    clses = dets[:, -1]
                    for j in range(test_dataset.num_classes):
                        inds = (clses == j)
                        top_preds[j + 1] = dets[inds, :5].astype(np.float32)
                        top_preds[j + 1][:, :4] /= scale

                    detections.append(top_preds)

                bbox_and_scores = {
                    j: np.concatenate([d[j] for d in detections], axis=0)
                    for j in range(1, test_dataset.num_classes + 1)
                }
                scores = np.hstack([
                    bbox_and_scores[j][:, 4]
                    for j in range(1, test_dataset.num_classes + 1)
                ])
                if len(scores) > max_per_image:
                    kth = len(scores) - max_per_image
                    thresh = np.partition(scores, kth)[kth]
                    for j in range(1, test_dataset.num_classes + 1):
                        keep_inds = (bbox_and_scores[j][:, 4] >= thresh)
                        bbox_and_scores[j] = bbox_and_scores[j][keep_inds]

                results[img_id] = bbox_and_scores

        end_time = time.clock()

        eval_results = test_dataset.run_eval(results, save_dir=cfg.ckpt_dir)
        print(eval_results)

        print('End time %s Seconds' % end_time)
        Run_time = end_time - start_time
        FPS = 100 / Run_time  # replace 100 with the number of images
        print('FPS %s ' % FPS)

        summary_writer.add_scalar('test_mAP/mAP', eval_results[0], epoch)
        return eval_results[0]
#--------------------end of test set--------------------#

#--------------------validation set--------------------#

    def val_map(epoch):
        print('\n Val@Epoch: %d' % epoch)

        start_time = time.clock()
        print('Start time %s Seconds' % start_time)

        model.eval()
        torch.cuda.empty_cache()
        max_per_image = 100

        results = {}
        with torch.no_grad():
            for inputs in val_loader:
                img_id, inputs, img_path = inputs[0]

                detections = []
                for scale in inputs:
                    inputs[scale]['image'] = inputs[scale]['image'].to(
                        cfg.device)  # (1,3)

                    output = model(
                        inputs[scale]['image'])[-1]  # hmap, regs, pxpy

                    dets = ctdet_decode(
                        *output, K=cfg.test_topk
                    )  # torch.cat([bboxes, scores, clses], dim=2)
                    dets = dets.detach().cpu().numpy().reshape(
                        1, -1, dets.shape[2])[0]

                    top_preds = {}
                    dets[:, :2] = transform_preds(
                        dets[:, 0:2], inputs[scale]['center'],
                        inputs[scale]['scale'],
                        (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
                    dets[:, 2:4] = transform_preds(
                        dets[:, 2:4], inputs[scale]['center'],
                        inputs[scale]['scale'],
                        (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
                    clses = dets[:, -1]
                    for j in range(val_dataset.num_classes):
                        inds = (clses == j)
                        top_preds[j + 1] = dets[inds, :5].astype(np.float32)
                        top_preds[j + 1][:, :4] /= scale

                    detections.append(top_preds)

                bbox_and_scores = {
                    j: np.concatenate([d[j] for d in detections], axis=0)
                    for j in range(1, val_dataset.num_classes + 1)
                }
                scores = np.hstack([
                    bbox_and_scores[j][:, 4]
                    for j in range(1, val_dataset.num_classes + 1)
                ])
                if len(scores) > max_per_image:
                    kth = len(scores) - max_per_image
                    thresh = np.partition(scores, kth)[kth]
                    for j in range(1, val_dataset.num_classes + 1):
                        keep_inds = (bbox_and_scores[j][:, 4] >= thresh)
                        bbox_and_scores[j] = bbox_and_scores[j][keep_inds]

                results[img_id] = bbox_and_scores

        end_time = time.clock()

        eval_results = val_dataset.run_eval(results, save_dir=cfg.ckpt_dir)
        print(eval_results)

        print('End time %s Seconds' % end_time)
        Run_time = end_time - start_time
        FPS = 100 / Run_time  # replace 100 with the number of images
        print('FPS %s ' % FPS)

        summary_writer.add_scalar('val_mAP/mAP', eval_results[0], epoch)
        return eval_results[0]


#--------------------end of validation set--------------------#

    print('Starting training...')
    Max_test_AP = 0  # max test AP
    Max_val_AP = 0  # max validation AP
    flag_epoch = 1

    for epoch in range(1, cfg.num_epochs + 1):
        train_sampler.set_epoch(epoch)
        train(epoch)

        if epoch >= flag_epoch:
            test_mAP = test_map(epoch)
            val_mAP = val_map(epoch)
            if (test_mAP > Max_test_AP):
                Max_test_AP = test_mAP
            if (val_mAP > Max_val_AP):
                print(
                    saver.save(model.module.state_dict(),
                               'checkpoint_MaxAP_epoch' + str(epoch)))
                Max_val_AP = val_mAP
        print(saver.save(model.module.state_dict(),
                         'checkpoint'))  # save current epoch

        total = sum([param.nelement()
                     for param in model.parameters()])  # calculate parameters
        print("Number of parameter: %.2fM" % (total / 1e6))

        print('Max_test_AP=%s' % Max_test_AP)
        print('Max_val_AP=%s' % Max_val_AP)
        lr_scheduler.step(epoch)  # move to here after pytorch1.1.0

    summary_writer.close()
Ejemplo n.º 16
0
def main():
    logger = create_logger(save_dir=cfg.log_dir)
    print = logger.info
    print(cfg)

    torch.manual_seed(317)
    torch.backends.cudnn.benchmark = False
    cfg.device = torch.device('cuda')

    print('Setting up data...')
    val_dataset = COCO_eval(cfg.data_dir,
                            'val',
                            test_scales=cfg.test_scales,
                            test_flip=cfg.test_flip)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=1,
                                             pin_memory=True,
                                             collate_fn=val_dataset.collate_fn)

    print('Creating model...')
    if 'hourglass' in cfg.arch:
        model = get_hourglass[cfg.arch]
    else:
        raise NotImplementedError

    model = model.to(cfg.device)
    model.load_state_dict(torch.load(cfg.pretrain_dir))
    print('loaded pretrained model from %s !' % cfg.pretrain_dir)

    print('validation starts at %s' % datetime.now())
    model.eval()
    results = {}
    with torch.no_grad():
        for inputs in val_loader:
            img_id, inputs = inputs[0]

            detections, centers = [], []
            for scale in inputs:
                inputs[scale]['image'] = inputs[scale]['image'].to(cfg.device)
                output = model(inputs[scale]['image'])[-1]
                dets, cts = _decode(*output,
                                    ae_threshold=cfg.ae_threshold,
                                    K=cfg.topk,
                                    kernel=3)
                dets = dets.reshape(dets.shape[0], -1,
                                    8).detach().cpu().numpy()
                cts = cts.reshape(cts.shape[0], -1, 4).detach().cpu().numpy()
                if dets.shape[0] == 2:
                    dets[1, :,
                         [0, 2]] = inputs[scale]['fmap_size'][0,
                                                              1] - dets[1, :,
                                                                        [2, 0]]
                    cts[1, :,
                        [0]] = inputs[scale]['fmap_size'][0, 1] - cts[1, :,
                                                                      [0]]
                dets = dets.reshape(1, -1, 8)
                cts = cts.reshape(1, -1, 4)

                _rescale_dets(dets, cts, inputs[scale]['ratio'],
                              inputs[scale]['border'], inputs[scale]['size'])
                dets[:, :, 0:4] /= scale
                cts[:, :, 0:2] /= scale

                detections.append(dets)
                if scale == 1:
                    centers.append(cts)

            detections = np.concatenate(detections, axis=1)[0]
            centers = np.concatenate(centers, axis=1)[0]
            detections, classes = center_match(detections, centers)

            results[img_id] = {}
            for j in range(val_dataset.num_classes):
                keep_inds = (classes == j)
                results[img_id][j + 1] = detections[keep_inds][:, 0:7].astype(
                    np.float32)
                soft_nms_merge(results[img_id][j + 1],
                               Nt=cfg.nms_threshold,
                               method=2,
                               weight_exp=cfg.w_exp)
                # soft_nms(results[img_id][j + 1], Nt=0.5, method=2)
                results[img_id][j + 1] = results[img_id][j + 1][:, 0:5]

            scores = np.hstack([
                results[img_id][j][:, -1]
                for j in range(1, val_dataset.num_classes + 1)
            ])
            if len(scores) > val_dataset.max_objs:
                kth = len(scores) - val_dataset.max_objs
                thresh = np.partition(scores, kth)[kth]
                for j in range(1, val_dataset.num_classes + 1):
                    keep_inds = (results[img_id][j][:, -1] >= thresh)
                    results[img_id][j] = results[img_id][j][keep_inds]

    eval_results = val_dataset.run_eval(results, save_dir=cfg.ckpt_dir)
    print(eval_results)
    print('validation ends at %s' % datetime.now())