コード例 #1
0
def train(train_dataloader, model, optimizer, lr_scheduler):
    def is_valid_number(x):
        return not (math.isnan(x) or math.isinf(x) or x > 1e4)

    logger.info("model\n{}".format(describe(model)))
    tb_writer = SummaryWriter(cfg.PRUNING.FINETUNE.LOG_DIR)
    average_meter = AverageMeter()
    start_epoch = cfg.PRUNING.FINETUNE.START_EPOCH
    num_per_epoch = len(
        train_dataloader.dataset) // (cfg.PRUNING.FINETUNE.BATCH_SIZE)
    iter = 0
    if not os.path.exists(cfg.PRUNING.FINETUNE.SNAPSHOT_DIR):
        os.makedirs(cfg.PRUNING.FINETUNE.SNAPSHOT_DIR)
    for epoch in range(cfg.PRUNING.FINETUNE.START_EPOCH,
                       cfg.PRUNING.FINETUNE.EPOCHS):
        train_dataloader.dataset.shuffle()
        lr_scheduler.step(epoch)
        # log for lr
        for idx, pg in enumerate(optimizer.param_groups):
            tb_writer.add_scalar('lr/group{}'.format(idx + 1), pg['lr'], iter)
        cur_lr = lr_scheduler.get_cur_lr()
        for data in train_dataloader:
            begin = time.time()
            examplar_img = data['examplar_img'].cuda()
            search_img = data['search_img'].cuda()
            gt_cls = data['gt_cls'].cuda()
            gt_delta = data['gt_delta'].cuda()
            delta_weight = data['delta_weight'].cuda()
            data_time = time.time() - begin
            losses = model.forward(examplar_img, search_img, gt_cls, gt_delta,
                                   delta_weight)
            cls_loss = losses['cls_loss']
            loc_loss = losses['loc_loss']
            loss = losses['total_loss']

            if is_valid_number(loss.item()):
                optimizer.zero_grad()
                loss.backward()
                if cfg.PRUNING.FINETUNE.LOG_GRAD:
                    log_grads(model.module, tb_writer, iter)
                clip_grad_norm_(model.parameters(),
                                cfg.PRUNING.FINETUNE.GRAD_CLIP)
                optimizer.step()

            batch_time = time.time() - begin
            batch_info = {}
            batch_info['data_time'] = data_time
            batch_info['batch_time'] = batch_time
            for k, v in losses.items():
                batch_info[k] = v
            average_meter.update(**batch_info)
            for k, v in batch_info.items():
                tb_writer.add_scalar(k, v, iter)

            if iter % cfg.TRAIN.PRINT_EVERY == 0:
                logger.info(
                    'epoch: {}, iter: {}, cur_lr:{}, cls_loss: {}, loc_loss: {}, loss: {}'
                    .format(epoch + 1, iter, cur_lr, cls_loss.item(),
                            loc_loss.item(), loss.item()))
                print_speed(iter + 1 + start_epoch * num_per_epoch,
                            average_meter.batch_time.avg,
                            cfg.PRUNING.FINETUNE.EPOCHS * num_per_epoch)
            iter += 1
        # save model
        state = {
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'epoch': epoch + 1,
            'mask': model.mask,
            'mask_scores': model.mask_scores
        }
        logger.info('save snapshot to {}/checkpoint_e{}.pth'.format(
            cfg.PRUNING.FINETUNE.SNAPSHOT_DIR, epoch + 1))
        torch.save(
            state,
            '{}/checkpoint_e{}.pth'.format(cfg.PRUNING.FINETUNE.SNAPSHOT_DIR,
                                           epoch + 1))
コード例 #2
0
def train(train_loader, model, optimizer, lr_scheduler, epoch, cfg):
    """
    模型训练
    :param train_loader:训练数据
    :param model:
    :param optimizer:
    :param lr_scheduler:
    :param epoch:
    :param cfg:
    :return:
    """

    global tb_index, best_acc, cur_lr, logger
    # 获取当前的学习率
    cur_lr = lr_scheduler.get_cur_lr()
    logger = logging.getLogger('global')
    #
    avg = AverageMeter()
    model.train()
    # GPU
    #  model = model.cuda()
    end = time.time()

    def is_valid_number(x):
        return not (math.isnan(x) or math.isinf(x) or x > 1e4)

    num_per_epoch = len(train_loader.dataset) // args.epochs // args.batch
    print("num_per_epoch", num_per_epoch)
    start_epoch = epoch
    epoch = epoch
    # 获取每个batch的输入
    for iter, input in enumerate(train_loader):
        if epoch != iter // num_per_epoch + start_epoch:  # next epoch
            epoch = iter // num_per_epoch + start_epoch
            # 创建存储路径
            if not os.path.exists(args.save_dir):  # makedir/save model
                os.makedirs(args.save_dir)
            # 存储训练结果
            save_checkpoint(
                {
                    'epoch': epoch,
                    'arch': args.arch,
                    'state_dict': model.module.state_dict(),
                    'best_acc': best_acc,
                    'optimizer': optimizer.state_dict(),
                    'anchor_cfg': cfg['anchors']
                }, False,
                os.path.join(args.save_dir, 'checkpoint_e%d.pth' % (epoch)),
                os.path.join(args.save_dir, 'best.pth'))

            if epoch == args.epochs:
                return
            # 更新优化器和学习方法
            if model.module.features.unfix(epoch / args.epochs):
                logger.info('unfix part model.')
                optimizer, lr_scheduler = build_opt_lr(model.module, cfg, args,
                                                       epoch)
            # 获取当前学习率
            lr_scheduler.step(epoch)
            cur_lr = lr_scheduler.get_cur_lr()

            logger.info('epoch:{}'.format(epoch))
        # 更新日志
        tb_index = iter
        if iter % num_per_epoch == 0 and iter != 0:
            for idx, pg in enumerate(optimizer.param_groups):
                logger.info("epoch {} lr {}".format(epoch, pg['lr']))
                tb_writer.add_scalar('lr/group%d' % (idx + 1), pg['lr'],
                                     tb_index)

        data_time = time.time() - end
        avg.update(data_time=data_time)
        # 输入数据
        x = {
            # GPU
            # 'cfg': cfg,
            # 'template': torch.autograd.Variable(input[0]).cuda(),
            # 'search': torch.autograd.Variable(input[1]).cuda(),
            # 'label_cls': torch.autograd.Variable(input[2]).cuda(),
            # 'label_loc': torch.autograd.Variable(input[3]).cuda(),
            # 'label_loc_weight': torch.autograd.Variable(input[4]).cuda(),
            # 'label_mask': torch.autograd.Variable(input[6]).cuda(),
            # 'label_mask_weight': torch.autograd.Variable(input[7]).cuda(),
            'cfg': cfg,
            'template': torch.autograd.Variable(input[0]),
            'search': torch.autograd.Variable(input[1]),
            'label_cls': torch.autograd.Variable(input[2]),
            'label_loc': torch.autograd.Variable(input[3]),
            'label_loc_weight': torch.autograd.Variable(input[4]),
            'label_mask': torch.autograd.Variable(input[6]),
            'label_mask_weight': torch.autograd.Variable(input[7]),
        }
        # 输出数据
        outputs = model(x)

        # 计算损失函数
        rpn_cls_loss, rpn_loc_loss, rpn_mask_loss = torch.mean(
            outputs['losses'][0]), torch.mean(
                outputs['losses'][1]), torch.mean(outputs['losses'][2])
        # 计算精度
        mask_iou_mean, mask_iou_at_5, mask_iou_at_7 = torch.mean(
            outputs['accuracy'][0]), torch.mean(
                outputs['accuracy'][1]), torch.mean(outputs['accuracy'][2])
        # 获取分类,回归和分割所占的比例
        cls_weight, reg_weight, mask_weight = cfg['loss']['weight']
        # 计算总损失
        loss = rpn_cls_loss * cls_weight + rpn_loc_loss * reg_weight + rpn_mask_loss * mask_weight
        # 将梯度置零
        optimizer.zero_grad()
        # 反向传播
        loss.backward()

        if cfg['clip']['split']:
            torch.nn.utils.clip_grad_norm_(model.module.features.parameters(),
                                           cfg['clip']['feature'])
            torch.nn.utils.clip_grad_norm_(model.module.rpn_model.parameters(),
                                           cfg['clip']['rpn'])
            torch.nn.utils.clip_grad_norm_(
                model.module.mask_model.parameters(), cfg['clip']['mask'])
        else:
            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                           args.clip)  # gradient clip

        if is_valid_number(loss.item()):
            optimizer.step()

        siammask_loss = loss.item()

        batch_time = time.time() - end
        # 参数更新
        avg.update(batch_time=batch_time,
                   rpn_cls_loss=rpn_cls_loss,
                   rpn_loc_loss=rpn_loc_loss,
                   rpn_mask_loss=rpn_mask_loss,
                   siammask_loss=siammask_loss,
                   mask_iou_mean=mask_iou_mean,
                   mask_iou_at_5=mask_iou_at_5,
                   mask_iou_at_7=mask_iou_at_7)
        # 参数写入tensorboard
        tb_writer.add_scalar('loss/cls', rpn_cls_loss, tb_index)
        tb_writer.add_scalar('loss/loc', rpn_loc_loss, tb_index)
        tb_writer.add_scalar('loss/mask', rpn_mask_loss, tb_index)
        tb_writer.add_scalar('mask/mIoU', mask_iou_mean, tb_index)
        tb_writer.add_scalar('mask/[email protected]', mask_iou_at_5, tb_index)
        tb_writer.add_scalar('mask/[email protected]', mask_iou_at_7, tb_index)
        end = time.time()
        # 日志输出
        if (iter + 1) % args.print_freq == 0:
            logger.info(
                'Epoch: [{0}][{1}/{2}] lr: {lr:.6f}\t{batch_time:s}\t{data_time:s}'
                '\t{rpn_cls_loss:s}\t{rpn_loc_loss:s}\t{rpn_mask_loss:s}\t{siammask_loss:s}'
                '\t{mask_iou_mean:s}\t{mask_iou_at_5:s}\t{mask_iou_at_7:s}'.
                format(epoch + 1, (iter + 1) % num_per_epoch,
                       num_per_epoch,
                       lr=cur_lr,
                       batch_time=avg.batch_time,
                       data_time=avg.data_time,
                       rpn_cls_loss=avg.rpn_cls_loss,
                       rpn_loc_loss=avg.rpn_loc_loss,
                       rpn_mask_loss=avg.rpn_mask_loss,
                       siammask_loss=avg.siammask_loss,
                       mask_iou_mean=avg.mask_iou_mean,
                       mask_iou_at_5=avg.mask_iou_at_5,
                       mask_iou_at_7=avg.mask_iou_at_7))
            print_speed(iter + 1, avg.batch_time.avg,
                        args.epochs * num_per_epoch)
コード例 #3
0
def train(train_loader, model, optimizer, lr_scheduler, epoch, cfg):
    global tb_index, best_acc, cur_lr, logger
    cur_lr = lr_scheduler.get_cur_lr()
    logger = logging.getLogger('global')
    avg = AverageMeter()
    model.module.features.eval()
    model.module.rpn_model.eval()
    model.module.mask_model.eval()
    # model.train()
    model = model.cuda()
    end = time.time()

    def is_valid_number(x):
        return not (math.isnan(x) or math.isinf(x) or x > 1e4)

    num_per_epoch = len(train_loader.dataset) // args.epochs // args.batch
    start_epoch = epoch
    epoch = epoch
    for iter, input in enumerate(train_loader):

        if epoch != iter // num_per_epoch + start_epoch:  # next epoch
            epoch = iter // num_per_epoch + start_epoch

            if not os.path.exists(args.save_dir):  # makedir/save model
                os.makedirs(args.save_dir)

            save_checkpoint(
                {
                    'epoch': epoch,
                    'arch': args.arch,
                    'state_dict': model.module.state_dict(),
                    'best_acc': best_acc,
                    'optimizer': optimizer.state_dict(),
                    'anchor_cfg': cfg['anchors']
                }, False,
                os.path.join(args.save_dir, 'checkpoint_e%d.pth' % (epoch)),
                os.path.join(args.save_dir, 'best.pth'))

            if epoch == args.epochs:
                return

            if model.module.features.unfix(epoch / args.epochs):
                logger.info('unfix part model.')
                optimizer, lr_scheduler = build_opt_lr(model.module, cfg, args,
                                                       epoch)

            lr_scheduler.step(epoch)
            cur_lr = lr_scheduler.get_cur_lr()

            logger.info('epoch:{}'.format(epoch))

        tb_index = iter
        if iter % num_per_epoch == 0 and iter != 0:
            for idx, pg in enumerate(optimizer.param_groups):
                logger.info("epoch {} lr {}".format(epoch, pg['lr']))
                tb_writer.add_scalar('lr/group%d' % (idx + 1), pg['lr'],
                                     tb_index)

        data_time = time.time() - end
        avg.update(data_time=data_time)
        x = {
            'cfg': cfg,
            'template': torch.autograd.Variable(input[0]).cuda(),
            'search': torch.autograd.Variable(input[1]).cuda(),
            'label_cls': torch.autograd.Variable(input[2]).cuda(),
            'label_loc': torch.autograd.Variable(input[3]).cuda(),
            'label_loc_weight': torch.autograd.Variable(input[4]).cuda(),
            'label_mask': torch.autograd.Variable(input[6]).cuda(),
            'label_kp_weight': torch.autograd.Variable(input[7]).cuda(),
            'label_mask_weight': torch.autograd.Variable(input[8]).cuda(),
            'label_kp': torch.autograd.Variable(input[9]).cuda()
        }

        outputs = model(x)

        rpn_cls_loss, rpn_loc_loss, rpn_kp_loss, rpn_heatmap_loss = torch.mean(outputs['losses'][0]),\
                                                                    torch.mean(outputs['losses'][1]),\
                                                                    torch.mean(outputs['losses'][2]),\
                                                                    torch.mean(outputs['losses'][3])

        # mask_iou_mean, mask_iou_at_5, mask_iou_at_7 = torch.mean(outputs['accuracy'][0]), torch.mean(outputs['accuracy'][1]), torch.mean(outputs['accuracy'][2])
        htmap_pred = outputs['predict'][-1]
        kp_pred = outputs['predict'][-2]
        htmap_pred = htmap_pred.squeeze(1)
        kp_pred = kp_pred.squeeze(1)
        # htmap_pred = htmap_pred.permute(0, 2, 3, 1)
        # kp_pred = kp_pred.permute(0, 2, 3, 1)
        htmap_pred = htmap_pred.cpu().numpy()
        kp_pred = kp_pred.cpu().detach().numpy()
        f, (ax1, ax2) = plt.subplots(1, 2)
        print(htmap_pred[0].shape)
        print(kp_pred[0].shape)
        ax1.imshow(htmap_pred[0])
        ax2.imshow(kp_pred[0])
        plt.show()

        cls_weight, reg_weight, kp_weight, heatmap_weight = cfg['loss'][
            'weight']

        loss = rpn_cls_loss * cls_weight + rpn_loc_loss * reg_weight + rpn_kp_loss * kp_weight + rpn_heatmap_loss * heatmap_weight

        optimizer.zero_grad()
        loss.backward()

        if cfg['clip']['split']:
            torch.nn.utils.clip_grad_norm_(model.module.features.parameters(),
                                           cfg['clip']['feature'])
            torch.nn.utils.clip_grad_norm_(model.module.rpn_model.parameters(),
                                           cfg['clip']['rpn'])
            torch.nn.utils.clip_grad_norm_(
                model.module.mask_model.parameters(), cfg['clip']['mask'])
        else:
            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                           args.clip)  # gradient clip

        if is_valid_number(loss.item()):
            optimizer.step()

        siammask_loss = loss.item()

        batch_time = time.time() - end

        avg.update(batch_time=batch_time,
                   rpn_cls_loss=rpn_cls_loss,
                   rpn_loc_loss=rpn_loc_loss,
                   rpn_kp_loss=rpn_kp_loss * kp_weight,
                   rpn_heatmap_loss=rpn_heatmap_loss * heatmap_weight,
                   siammask_loss=siammask_loss)
        # mask_iou_mean=mask_iou_mean, mask_iou_at_5=mask_iou_at_5, mask_iou_at_7=mask_iou_at_7)

        tb_writer.add_scalar('loss/cls', rpn_cls_loss, tb_index)
        tb_writer.add_scalar('loss/loc', rpn_loc_loss, tb_index)
        tb_writer.add_scalar('loss/kp_reg', rpn_kp_loss * kp_weight, tb_index)
        tb_writer.add_scalar('loss/heatmap', rpn_heatmap_loss * heatmap_weight,
                             tb_index)
        # tb_writer.add_scalar('mask/mIoU', mask_iou_mean, tb_index)
        # tb_writer.add_scalar('mask/[email protected]', mask_iou_at_5, tb_index)
        # tb_writer.add_scalar('mask/[email protected]', mask_iou_at_7, tb_index)
        end = time.time()

        if (iter + 1) % args.print_freq == 0:
            logger.info(
                'Epoch: [{0}][{1}/{2}] lr: {lr:.6f}\t{batch_time:s}\t{data_time:s}'
                '\t{rpn_cls_loss:s}\t{rpn_loc_loss:s}\t{rpn_kp_loss:s}\t{rpn_heatmap_loss:s}\t{siammask_loss:s}'
                .format(epoch + 1, (iter + 1) % num_per_epoch,
                        num_per_epoch,
                        lr=cur_lr,
                        batch_time=avg.batch_time,
                        data_time=avg.data_time,
                        rpn_cls_loss=avg.rpn_cls_loss,
                        rpn_loc_loss=avg.rpn_loc_loss,
                        rpn_kp_loss=avg.rpn_kp_loss,
                        rpn_heatmap_loss=avg.rpn_heatmap_loss,
                        siammask_loss=avg.siammask_loss))
            # mask_iou_mean=avg.mask_iou_mean,
            # mask_iou_at_5=avg.mask_iou_at_5,mask_iou_at_7=avg.mask_iou_at_7))
            print_speed(iter + 1, avg.batch_time.avg,
                        args.epochs * num_per_epoch)
コード例 #4
0
                             epoch * train_lenth + step + 1)
        writer.add_scalars(
            'loss/merge', {
                "train_loss": train_loss,
                "test_loss": test_loss,
                "train_metric": train_metric,
                "test_metric": test_metric
            }, epoch * train_lenth + step + 1)

        # 更新avrager
        avg.update(step_time=step_time,
                   train_loss=train_loss,
                   test_loss=test_loss,
                   train_metric=train_metric)  # 算平均值

        # 打印结果
        if (step + 1) % print_freq == 0:
            global_logger.info(
                'Epoch: [{0}][{1}/{2}] {step_time:s}\t{train_loss:s}\t{test_loss:s}\t{train_metric:s}'
                .format(epoch + 1, (step + 1) % train_lenth,
                        train_lenth,
                        step_time=avg.step_time,
                        train_loss=avg.train_loss,
                        test_loss=avg.test_loss,
                        train_metric=avg.train_metric))
            print_speed(epoch * train_lenth + step + 1, avg.step_time.avg,
                        epoches * train_lenth)

    # scheduler更新
    scheduler.step()
コード例 #5
0
def train(train_loader, model, optimizer, lr_scheduler, epoch, cfg):
    global tb_index, best_acc, cur_lr, logger
    cur_lr = lr_scheduler.get_cur_lr()
    logger = logging.getLogger('global')
    avg = AverageMeter()
    model.train()
    model.module.features.eval()
    model.module.rpn_model.eval()
    model.module.features.apply(BNtoFixed)
    model.module.rpn_model.apply(BNtoFixed)

    model.module.mask_model.train()
    model.module.refine_model.train()
    model = model.cuda()
    end = time.time()

    def is_valid_number(x):
        return not (math.isnan(x) or math.isinf(x) or x > 1e4)

    num_per_epoch = len(train_loader.dataset) // args.epochs // args.batch
    start_epoch = epoch
    epoch = epoch
    for iter, input in enumerate(train_loader):

        if epoch != iter // num_per_epoch + start_epoch:  # next epoch
            epoch = iter // num_per_epoch + start_epoch

            if not os.path.exists(args.save_dir):  # makedir/save model
                os.makedirs(args.save_dir)

            save_checkpoint(
                {
                    'epoch': epoch,
                    'arch': args.arch,
                    'state_dict': model.module.state_dict(),
                    'best_acc': best_acc,
                    'optimizer': optimizer.state_dict(),
                    'anchor_cfg': cfg['anchors']
                }, False,
                os.path.join(args.save_dir, 'checkpoint_e%d.pth' % (epoch)),
                os.path.join(args.save_dir, 'best.pth'))

            if epoch == args.epochs:
                return

            optimizer, lr_scheduler = build_opt_lr(model.module, cfg, args,
                                                   epoch)

            lr_scheduler.step(epoch)
            cur_lr = lr_scheduler.get_cur_lr()

            logger.info('epoch:{}'.format(epoch))

        tb_index = iter
        if iter % num_per_epoch == 0 and iter != 0:
            for idx, pg in enumerate(optimizer.param_groups):
                logger.info("epoch {} lr {}".format(epoch, pg['lr']))
                tb_writer.add_scalar('lr/group%d' % (idx + 1), pg['lr'],
                                     tb_index)

        data_time = time.time() - end
        avg.update(data_time=data_time)
        track12 = {
            'cfg': cfg,
            'template': torch.autograd.Variable(input[0][0]).cuda(),
            'search': torch.autograd.Variable(input[0][1]).cuda(),
            'label_cls': torch.autograd.Variable(input[0][2]).cuda(),
            'label_loc': torch.autograd.Variable(input[0][3]).cuda(),
            'label_loc_weight': torch.autograd.Variable(input[0][4]).cuda(),
            'template_bbox': torch.autograd.Variable(input[0][5]).cuda(),
            'label_mask': torch.autograd.Variable(input[0][6]).cuda(),
            'label_mask_weight': torch.autograd.Variable(input[0][7]).cuda(),
        }
        track21 = {
            'cfg': cfg,
            'template': torch.autograd.Variable(input[1][0]).cuda(),
            'search': torch.autograd.Variable(input[1][1]).cuda(),
            'label_cls': torch.autograd.Variable(input[1][2]).cuda(),
            'label_loc': torch.autograd.Variable(input[1][3]).cuda(),
            'label_loc_weight': torch.autograd.Variable(input[1][4]).cuda(),
            'template_bbox': torch.autograd.Variable(input[1][5]).cuda(),
            'label_mask': torch.autograd.Variable(input[1][6]).cuda(),
            'label_mask_weight': torch.autograd.Variable(input[1][7]).cuda(),
        }

        # ========================== cycle forward frame1 -> frame2 ===================================
        outputs12 = model(track12, softmax=False)
        out_patch12 = trackres(cfg, outputs12, track12)
        track21['template'] = torch.autograd.Variable(
            torch.from_numpy(out_patch12).float()).cuda()
        # ========================== cycle backward frame2 -> frame1 ===================================
        outputs = model(track21, softmax=True)

        rpn_cls_loss, rpn_loc_loss, rpn_mask_loss = torch.mean(
            outputs['losses'][0]), torch.mean(
                outputs['losses'][1]), torch.mean(outputs['losses'][2])
        mask_iou_mean, mask_iou_at_5, mask_iou_at_7 = torch.mean(
            outputs['accuracy'][0]), torch.mean(
                outputs['accuracy'][1]), torch.mean(outputs['accuracy'][2])

        cls_weight, reg_weight, mask_weight = cfg['loss']['weight']

        loss = rpn_cls_loss * cls_weight + rpn_loc_loss * reg_weight + rpn_mask_loss * mask_weight

        optimizer.zero_grad()
        loss.backward()

        if cfg['clip']['split']:
            torch.nn.utils.clip_grad_norm_(model.module.features.parameters(),
                                           cfg['clip']['feature'])
            torch.nn.utils.clip_grad_norm_(model.module.rpn_model.parameters(),
                                           cfg['clip']['rpn'])
            torch.nn.utils.clip_grad_norm_(
                model.module.mask_model.parameters(), cfg['clip']['mask'])
            torch.nn.utils.clip_grad_norm_(
                model.module.refine_model.parameters(), cfg['clip']['mask'])
        else:
            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                           args.clip)  # gradient clip

        if is_valid_number(loss.item()):
            optimizer.step()

        siammask_loss = loss.item()

        batch_time = time.time() - end

        rpn_cls_loss = min(rpn_cls_loss.item(), 1)
        rpn_loc_loss = min(rpn_loc_loss.item(), 1)
        rpn_mask_loss = min(rpn_mask_loss.item(), 1)
        mask_iou_mean = mask_iou_mean.item()
        mask_iou_at_5 = mask_iou_at_5.item()
        mask_iou_at_7 = mask_iou_at_7.item()

        avg.update(batch_time=batch_time,
                   rpn_cls_loss=rpn_cls_loss,
                   rpn_loc_loss=rpn_loc_loss,
                   rpn_mask_loss=rpn_mask_loss,
                   siammask_loss=siammask_loss,
                   mask_iou_mean=mask_iou_mean,
                   mask_iou_at_5=mask_iou_at_5,
                   mask_iou_at_7=mask_iou_at_7)

        tb_writer.add_scalar('loss/cls', rpn_cls_loss, tb_index)
        tb_writer.add_scalar('loss/loc', rpn_loc_loss, tb_index)
        tb_writer.add_scalar('loss/mask', rpn_mask_loss, tb_index)
        tb_writer.add_scalar('mask/mIoU', mask_iou_mean, tb_index)
        tb_writer.add_scalar('mask/[email protected]', mask_iou_at_5, tb_index)
        tb_writer.add_scalar('mask/[email protected]', mask_iou_at_7, tb_index)
        end = time.time()

        if (iter + 1) % args.print_freq == 0:
            logger.info(
                'Epoch: [{0}][{1}/{2}] lr: {lr:.6f}\t{batch_time:s}\t{data_time:s}'
                '\t{rpn_cls_loss:s}\t{rpn_loc_loss:s}\t{rpn_mask_loss:s}\t{siammask_loss:s}'
                '\t{mask_iou_mean:s}\t{mask_iou_at_5:s}\t{mask_iou_at_7:s}'.
                format(epoch + 1, (iter + 1) % num_per_epoch,
                       num_per_epoch,
                       lr=cur_lr,
                       batch_time=avg.batch_time,
                       data_time=avg.data_time,
                       rpn_cls_loss=avg.rpn_cls_loss,
                       rpn_loc_loss=avg.rpn_loc_loss,
                       rpn_mask_loss=avg.rpn_mask_loss,
                       siammask_loss=avg.siammask_loss,
                       mask_iou_mean=avg.mask_iou_mean,
                       mask_iou_at_5=avg.mask_iou_at_5,
                       mask_iou_at_7=avg.mask_iou_at_7))
            print_speed(iter + 1, avg.batch_time.avg,
                        args.epochs * num_per_epoch)
コード例 #6
0
def train_one_epoch(train_loader, model, optimizer, device, epoch):
    logger = logging.getLogger('global')
    model.train()

    lossitem, abs_diff, abs_rel, sq_rel, a1, a2, a3 = 0, 0, 0, 0, 0, 0, 0
    num_batches = 0.0

    for i_batch, sample_batched in enumerate(train_loader):
        t0 = time.time()

        rgb = sample_batched['rgb'].type(torch.FloatTensor)
        depth = sample_batched['depth']
        s1 = sample_batched['s1'].type(torch.FloatTensor)
        s2 = sample_batched['s2'].type(torch.FloatTensor)

        b = rgb.shape[0]

        rgb = rgb.to(device)
        depth = depth.to(device)
        s1 = s1.to(device)
        s2 = s2.to(device)

        s1.unsqueeze_(-1)
        s2.unsqueeze_(-1)

        s1s2 = torch.cat((s1, s2), 3)

        rgb = rgb.permute(0, 3, 1, 2)
        s1s2 = s1s2.permute(0, 3, 1, 2)

        # zero the parameter gradients
        optimizer.zero_grad()

        depth_predict = model(rgb, s1s2)

        depth_predict.squeeze_(1)
        loss = l2_loss(depth, depth_predict)

        # backward + optimize
        loss.backward()
        optimizer.step()

        lossitem0 = loss.item()
        abs_diff0, abs_rel0, sq_rel0, a10, a20, a30 = compute_errors(
            depth, depth_predict)

        num_batches += 1
        lossitem += lossitem0
        abs_diff += abs_diff0
        abs_rel += abs_rel0
        sq_rel += sq_rel0
        a1 += a10
        a2 += a20
        a3 += a30

        t1 = time.time()
        print_speed(i_batch, t1 - t0, train_loader.__len__())

    lossitem = lossitem / num_batches
    abs_diff = abs_diff / num_batches
    abs_rel = abs_rel / num_batches
    sq_rel = sq_rel / num_batches
    a1 = a1 / num_batches
    a2 = a2 / num_batches
    a3 = a3 / num_batches

    logger.info('Train Loss: {:.4f},'.format(lossitem))
    logger.info('                    abs_diff: {:.4f}'.format(abs_diff))
    logger.info('                    abs_rel: {:.4f}'.format(abs_rel))
    logger.info('                    sq_rel: {:.4f}'.format(sq_rel))
    logger.info('                    a1: {:.4f}'.format(a1))
    logger.info('                    a2: {:.4f}'.format(a2))
    logger.info('                    a3: {:.4f}'.format(a3))

    writer.add_scalar('train/loss', lossitem, epoch)
    writer.add_scalar('train/abs_diff', abs_diff, epoch)
    writer.add_scalar('train/abs_rel', abs_rel, epoch)
    writer.add_scalar('train/sq_rel', sq_rel, epoch)
    writer.add_scalar('train/a1', a1, epoch)
    writer.add_scalar('train/a2', a2, epoch)
    writer.add_scalar('train/a3', a3, epoch)
コード例 #7
0
def validate(val_loader, model, cfg):
    logger = logging.getLogger('global')
    try:
        rank = dist.get_rank()
        world_size = dist.get_world_size()
    except Exception as e:
        print(e)
        rank, world_size = 0, 1

    # switch to evaluate mode
    model.eval()

    total_rc = 0
    total_gt = 0

    logger.info('start validate')
    if not os.path.exists(args.results_dir):
        try:
            os.makedirs(args.results_dir)
        except Exception as e:
            print(e)
    fout = open(os.path.join(args.results_dir, 'results.json.rank%d' % rank),
                'w')

    for iter, input in enumerate(val_loader):
        img = torch.autograd.Variable(input[0]).cuda()
        img_info = input[1]
        gt_boxes = input[2]
        filenames = input[-1]
        x = {
            'cfg': cfg,
            'image': img,
            'image_info': img_info[:, :3],
            'ground_truth_bboxes': gt_boxes,
            'ignore_regions': None,
            'ground_truth_keypoints': None,
            'ground_truth_masks': None
        }
        batch_size = img.shape[0]
        t0 = time.time()
        outputs = model(x)['predict']
        t2 = time.time()

        proposals = outputs[0].data.cpu().numpy()
        bboxes = outputs[1].data.cpu().numpy()
        #keypoints = outputs[2].data.cpu().numpy()
        if isinstance(outputs[2], torch.autograd.Variable):
            keypoints = outputs[2].data.cpu().numpy()
            masks = None
        else:
            keypoints = None
            masks = outputs[2]
        # heatmap = outputs[3].data.cpu().numpy()
        if torch.is_tensor(gt_boxes):
            gt_boxes = gt_boxes.cpu().numpy()

        image_info = img_info
        img_ids = [
            _.split('/')[-1].split('_')[-1].split('.')[0] for _ in filenames
        ]
        image_info = [list(x) + [y] for x, y in zip(image_info, img_ids)]

        # visualize results
        #vis_helper.vis_results(args.results_dir, image_info, bboxes, keypoints, masks, heatmap, cfg['shared']['class_names'])
        write_results_to_file(fout,
                              image_info,
                              bboxes,
                              keypoints,
                              masks,
                              mask_thresh=0.5,
                              keep_num=100)

        # rpn recall
        for b_ix in range(batch_size):
            rois_per_image = proposals[proposals[:, 0] == b_ix]
            gts_per_image = gt_boxes[b_ix]
            num_rc, num_gt = bbox_helper.compute_recall(
                rois_per_image[:, 1:1 + 4], gts_per_image)
            total_gt += num_gt
            total_rc += num_rc
        logger.info('Test: [%d/%d] Time: %.3f %d/%d' %
                    (iter, len(val_loader), t2 - t0, total_rc, total_gt))
        print_speed(iter + 1, t2 - t0, len(val_loader))
    logger.info('rpn300 recall=%f' % (total_rc / total_gt))
    fout.close()
    return total_rc / total_gt
コード例 #8
0
def validation(epoch, log_interval, test_dataloader, model, loss, writer,
               device):
    """Validate on test dataset.

    Current validation is only for loss, pos|neg_distance.
    In future, we will add more validation like MAP5|10|50|100. 
    (maybe in another file.)

    Args:
        log_interval:
            How many time will the logger log once.
        test_dataloader:
            It should not be none! A Triplet dataloader to validate data.
        model:
            The model that used to test on dataset.
        loss: 
            Loss metric.
        writer:
            Tensorboard writer
        device: 
            Device that model compute on

    Return:
        epoch avrage value:
            triplet_loss, pos_dists, neg_dists
    
    """
    logger.info(
        "\n------------------------- Start validation -------------------------\n"
    )
    # epoch average meter
    avg_test = AverageMeter()

    # get test batch count
    current_test_batch = 0
    total_test_batch = len(test_dataloader)

    # check dataloader is not None
    assert test_dataloader is not None, "test_dataloader should not be None."

    for batch_idx, batch_sample in enumerate(test_dataloader):
        # Skip last iteration to avoid the problem of having different number of tensors while calculating
        # averages (sizes of tensors must be the same for pairwise distance calculation)
        if batch_idx + 1 == len(test_dataloader):
            continue

        # switch to evaluation mode.
        for param in model.parameters():
            param.requires_grad = False
        model.eval()

        # start time counting
        batch_start_time_test = time.time()

        # Forward pass - compute embeddings
        anc_imgs = batch_sample['anchor_img']
        pos_imgs = batch_sample['pos_img']
        neg_imgs = batch_sample['neg_img']

        pos_cls = batch_sample['pos_cls']
        neg_cls = batch_sample['neg_cls']

        # move to device
        anc_imgs = anc_imgs.to(device)
        pos_imgs = pos_imgs.to(device)
        neg_imgs = neg_imgs.to(device)
        pos_cls = pos_cls.to(device)
        neg_cls = neg_cls.to(device)

        # forward
        output = model.forward_triplet(anc_imgs, pos_imgs, neg_imgs)

        # get output
        anc_emb = output['anchor_map']
        pos_emb = output['pos_map']
        neg_emb = output['neg_map']

        pos_dists = torch.mean(output['dist_pos'])
        neg_dists = torch.mean(output['dist_neg'])

        # loss compute
        loss_value = loss(anc_emb, pos_emb, neg_emb)

        # batch time & batch count
        current_test_batch += 1
        batch_time = time.time() - batch_start_time_test

        # update avg
        avg_test.update(time=batch_time,
                        triplet_loss=loss_value,
                        pos_dists=pos_dists,
                        neg_dists=neg_dists)
        if current_test_batch % log_interval == 0:
            print_speed(current_test_batch, batch_time, total_test_batch,
                        "global")
            logger.info(
                "\n current global average information:\n batch_time {0:.5f} | triplet_loss: {1:.5f} | pos_dists: {2:.5f} | neg_dists: {3:.5f} \n"
                .format(avg_test.time.avg, avg_test.triplet_loss.avg,
                        avg_test.pos_dists.avg, avg_test.neg_dists.avg))
    else:
        writer.add_scalar("Validate/Loss/train",
                          avg_test.triplet_loss.avg,
                          global_step=epoch)
        writer.add_scalar("Validate/Other/pos_dists",
                          avg_test.pos_dists.avg,
                          global_step=epoch)
        writer.add_scalar("Validate/Other/neg_dists",
                          avg_test.neg_dists.avg,
                          global_step=epoch)

    return avg_test.triplet_loss.avg, avg_test.pos_dists.avg, avg_test.neg_dists.avg
コード例 #9
0
            writer.add_scalar("Train_Batch/Distance/neg_dists",
                              neg_dists,
                              global_step=current_batch)
            writer.add_scalar("Train_Batch_Global_AVG/loss",
                              avg.triplet_loss.avg,
                              global_step=current_batch)
            writer.add_scalar("Train_Batch_Global_AVG/pos_dists",
                              avg.pos_dists.avg,
                              global_step=current_batch)
            writer.add_scalar("Train_Batch_Global_AVG/neg_dists",
                              avg.neg_dists.avg,
                              global_step=current_batch)

            # log to logger
            if current_batch % log_interval == 0:
                print_speed(current_batch, batch_time, total_batch, "global")
                logger.info(
                    "\n current batch information:\n epoch: {0} | batch_time {1:5f} | triplet_loss: {2:.5f} | pos_dists: {3:.5f} | neg_dists: {4:.5f} \n"
                    .format(epoch + 1, avg.time.val, avg.triplet_loss.val,
                            avg.pos_dists.val, avg.neg_dists.val))
                logger.info(
                    "\n current global average information:\n epoch: {0} | batch_time {1:5f} | triplet_loss: {2:.5f} | pos_dists: {3:.5f} | neg_dists: {4:.5f} \n"
                    .format(epoch + 1, avg.time.avg, avg.triplet_loss.avg,
                            avg.pos_dists.avg, avg.neg_dists.avg))
        else:
            # add epoch avg
            writer.add_scalar("Train/Loss/train",
                              avg.triplet_loss.avg,
                              global_step=epoch)
            writer.add_scalar("Train/Other/train_pos_dists",
                              avg.pos_dists.avg,
コード例 #10
0
def train(train_loader, model, lr_scheduler, epoch, cfg, warmup=False):
    logger = logging.getLogger('global')

    model.cuda()
    model.train()
    world_size = 1
    rank = 0
    if args.dist:
        rank = dist.get_rank()
        world_size = dist.get_world_size()

    def freeze_bn(m):
        classname = m.__class__.__name__
        if classname.find('BatchNorm') != -1:
            m.eval()

    model.apply(freeze_bn)
    logger.info('freeze bn')

    t0 = time.time()

    if args.dist:
        # update random seed
        train_loader.sampler.set_epoch(epoch)

    t0 = time.time()
    for iter, input in enumerate(train_loader):
        #torch.cuda.empty_cache()
        if warmup:
            # update lr for each iteration
            lr_scheduler.step()
        x = {
            'cfg': cfg,
            'image': torch.autograd.Variable(input[0]).cuda(),
            'image_info': input[1][:, :3],
            'ground_truth_bboxes': input[2],
            'ignore_regions': None,  # input[3],
            'ground_truth_keypoints': input[4],
            'ground_truth_masks': input[5]
        }
        # for debug
        #debugger.store_tensor_as_image(input[0])
        #debugger.store_filenames(input[-1])
        t1 = time.time()

        outputs = model(x)
        t11 = time.time()

        rpn_cls_loss, rpn_loc_loss, rcnn_cls_loss, rcnn_loc_loss, keypoint_loss = outputs[
            'losses']
        # gradient is averaged by normalizing the loss with world_size
        #loss = (rpn_cls_loss + rpn_loc_loss + rcnn_cls_loss + rcnn_loc_loss + keypoint_loss) / world_size
        loss = sum(outputs['losses']) / world_size
        '''
        if args.dist == 0 or dist.get_rank() == 0:
            graph = vis_helper.make_dot(loss, dict(model.named_parameters()))
            logger.info('PATH:{}'.format(os.environ['PATH']))
            graph.render(filename = 'graph', directory='graph', view=False)
        exit()
        '''
        t12 = time.time()
        lr_scheduler.optimizer.zero_grad()
        loss.backward()
        t13 = time.time()
        if args.dist:
            average_gradients(model)
        t14 = time.time()
        lr_scheduler.optimizer.step()
        t15 = time.time()

        rpn_accuracy = outputs['accuracy'][0][0] / 100.
        rcnn_accuracy = outputs['accuracy'][1][0] / 100.
        loss = loss.data.cpu()[0]
        rpn_cls_loss = rpn_cls_loss.data.cpu()[0]
        rpn_loc_loss = rpn_loc_loss.data.cpu()[0]
        rcnn_cls_loss = rcnn_cls_loss.data.cpu()[0]
        rcnn_loc_loss = rcnn_loc_loss.data.cpu()[0]
        if keypoint_loss is not None:
            keypoint_loss = keypoint_loss.data.cpu()[0]

        t2 = time.time()
        lr = lr_scheduler.get_lr()[0]
        logger.info(
            'Epoch: [%d][%d/%d] LR:%f Time: %.3f Loss: %.5f (rpn_cls: %.5f rpn_loc: %.5f rpn_acc: %.5f'
            ' rcnn_cls: %.5f, rcnn_loc: %.5f rcnn_acc:%.5f kpt:%.5f)' %
            (epoch, iter, len(train_loader), lr, t2 - t0, loss * world_size,
             rpn_cls_loss, rpn_loc_loss, rpn_accuracy, rcnn_cls_loss,
             rcnn_loc_loss, rcnn_accuracy, keypoint_loss))
        t3 = time.time()
        #logger.info('data:{0}, forward:{1}, bp:{2}, sync:{3}, upd:{4}, loss:{5}, prt:{6}'.format(t1-t0, t11-t1, t13-t12, t14-t13, t15-t14, t2-t15, t3-t2))
        #logger.info('data:%f, ' % (t1-t0) +
        #            'forward:%f, ' % (t11-t1) +
        #            'sum_loss:%f, ' % (t12-t11) +
        #            'bp:%f, ' % (t13-t12) +
        #            'sync:%f, ' % (t14-t13) +
        #            'upd:%f, ' % (t15-t14) +
        #            'loss:%f, ' % (t2-t15) +
        #            'prt:%f, ' % (t3-t2))
        print_speed((epoch - 1) * len(train_loader) + iter + 1, t2 - t0,
                    args.epochs * len(train_loader))
        t0 = t2
コード例 #11
0
def train(train_loader,
          target_loader,
          val_loader,
          model,
          dec_model,
          dis_model,
          dis_model_patch,
          lr_scheduler,
          lr_scheduler_dec,
          lr_scheduler_dis,
          lr_scheduler_dis_patch,
          epoch,
          cfg,
          warmup=False):
    logger = logging.getLogger('global')
    model.cuda()
    model.train()
    dis_model.cuda()
    dis_model.train()
    dec_model.cuda()
    dec_model.train()
    dis_model_patch.cuda()
    dis_model_patch.train()

    if args.dist:
        rank = dist.get_rank()
        world_size = dist.get_world_size()
    else:
        world_size = 1
        rank = 0

    def freeze_bn(m):
        classname = m.__class__.__name__
        if classname.find('Norm') != -1:
            m.eval()

    model.apply(freeze_bn)
    fix_num = args.fix_num
    count = 1
    for mm in model.modules():
        if count > fix_num:
            break
        if isinstance(mm, torch.nn.Conv2d) and count <= fix_num:
            mm.eval()
            count += 1

    # dec_model.apply(freeze_bn)
    logger.info('freeze bn')

    end = time.time()

    t0 = time.time()
    l1_loss = torch.nn.L1Loss()
    if args.dist:
        # update random seed
        train_loader.sampler.set_epoch(epoch)
        target_loader.sampler.set_epoch(epoch)

    for iter, (input, target) in enumerate(zip(train_loader, target_loader)):
        # torch.cuda.empty_cache()
        if warmup:
            # update lr for each iteration
            lr_scheduler.step()
            lr_scheduler_dis.step()
            lr_scheduler_dec.step()
            lr_scheduler_dis_patch.step()
        x = {
            'cfg': cfg,
            'image': (input[0]).cuda(),
            'image_info': input[1],
            'ground_truth_bboxes': input[2],
            'ignore_regions': None,
            'cluster_num': args.cluster_num,
            'threshold': args.threshold
            # 'ignore_regions': input[3] if args.dataset == 'coco' else None
        }
        target = (target).cuda()
        outputs = model(x, target)

        centers_source, centers_target = outputs['cluster_centers']

        corners_source = get_corner_from_center(centers_source)
        corners_target = get_corner_from_center(centers_target)

        x_small = []
        target_small = []
        for corners_idx in range(0, len(corners_source)):
            x1 = corners_source[corners_idx][0]
            y1 = corners_source[corners_idx][1]
            x2 = corners_source[corners_idx][2]
            y2 = corners_source[corners_idx][3]
            assert (
                x2 -
                x1 == args.recon_size), "x size does not match 256 in source "
            assert (
                y2 -
                y1 == args.recon_size), "y size does not match 256 in source "
            x_small_tmp = x['image'][:, :, y1:y2, x1:x2]
            x_small.append(x_small_tmp)

        x_small = torch.cat(x_small, 0)

        for corners_idx in range(0, len(corners_target)):
            x1 = corners_target[corners_idx][0]
            y1 = corners_target[corners_idx][1]
            x2 = corners_target[corners_idx][2]
            y2 = corners_target[corners_idx][3]
            assert (
                x2 -
                x1 == args.recon_size), "x size does not match 256 in target "
            assert (
                y2 -
                y1 == args.recon_size), "y size does not match 256 in target "
            target_small_tmp = target[:, :, y1:y2, x1:x2]
            target_small.append(target_small_tmp)

        target_small = torch.cat(target_small, 0)  # Size(4, 3, 256, 256)

        x_source_patch, x_target_patch = outputs[
            'cluster_features']  # Size(4, 128, 4096)
        x_source_recon, x_target_recon = dec_model(
            x_source_patch, x_target_patch)  # Size(4, 3, 256, 256)

        ##########################################################################
        ######################### (1): start dis_update ##########################
        ##########################################################################

        lr_scheduler_dis.optimizer.zero_grad()
        x_source_dis, x_target_dis = dis_model(x_source_recon,
                                               x_target_recon)  # (4, 256)
        x_source_real, x_target_real = dis_model(x_small,
                                                 target_small)  # (4, 256)
        x_source_dis = torch.sigmoid(x_source_dis)  # (4, dim)
        x_target_dis = torch.sigmoid(x_target_dis)  # (4, dim)
        x_source_real = torch.sigmoid(x_source_real)  # (4, dim)
        x_target_real = torch.sigmoid(x_target_real)

        x_source_dis_cluster = torch.split(x_source_dis, 1, dim=0)
        x_source_real_cluster = torch.split(x_source_real, 1, dim=0)
        score_1_cluster = generate_soft_label(1, x_source_real_cluster[0])
        score_0_cluster = generate_soft_label(0, x_source_dis_cluster[0])

        adloss_source = 0.0

        #################### (1.1): for source clusters############################
        for clu_idx in range(0, len(x_source_dis_cluster)):
            adloss_source += (
                F.binary_cross_entropy(x_source_dis_cluster[clu_idx],
                                       score_1_cluster) +
                F.binary_cross_entropy(x_source_real_cluster[clu_idx],
                                       score_0_cluster))

        #################### (1.2): for target clusters############################

        x_target_patch_pro = dis_model_patch(x_target_patch)
        x_target_patch_pro_mean = torch.mean(x_target_patch_pro,
                                             1)  # Size(4,1)
        x_source_patch_pro = dis_model_patch(x_source_patch)  # (4, 512)

        x_target_dis_cluster = torch.split(x_target_dis, 1, dim=0)
        x_target_real_cluster = torch.split(x_target_real, 1, dim=0)

        adloss_target = 0.0
        for clu_idx in range(0, len(x_target_dis_cluster)):
            adloss_target += (
                x_target_patch_pro_mean[clu_idx] * F.binary_cross_entropy(
                    x_target_dis_cluster[clu_idx], score_0_cluster) +
                F.binary_cross_entropy(x_target_real_cluster[clu_idx],
                                       score_1_cluster))

        adloss = (adloss_source + adloss_target) / world_size
        adloss.backward(retain_graph=True)

        max_grad3 = 0.0
        for pp in dis_model.parameters():
            tmp = torch.max(pp.grad.data)
            if max_grad3 < tmp:
                max_grad3 = tmp

        if args.dist:
            average_gradients(dis_model)

        lr_scheduler_dis.optimizer.step()

        ##########################################################################
        ####################### (2): start dis_patch_update ######################
        ##########################################################################

        lr_scheduler_dis_patch.optimizer.zero_grad()
        score_0_patch = generate_soft_label(0, x_target_patch_pro)
        score_1_patch = generate_soft_label(1, x_source_patch_pro)

        patch_loss_target = F.binary_cross_entropy(x_target_patch_pro,
                                                   score_0_patch)
        patch_loss_source = F.binary_cross_entropy(x_source_patch_pro,
                                                   score_1_patch)

        dis_patch_loss = (patch_loss_source + patch_loss_target) / world_size
        dis_patch_loss.backward(retain_graph=True)
        if args.dist:
            average_gradients(dis_model_patch)

        lr_scheduler_dis_patch.optimizer.step()

        ##########################################################################
        ########################## (3): start decoder_update #####################
        ##########################################################################

        lr_scheduler_dec.optimizer.zero_grad()

        # x_source_recon, x_target_recon = dec_model(x_target_patch, x_source_patch)
        x_source_dis, x_target_dis = dis_model(x_source_recon, x_target_recon)
        x_source_dis = torch.sigmoid(x_source_dis)  # (4, dim)

        x_source_real, x_target_real = dis_model(x_small,
                                                 target_small)  # (4, 256)
        x_source_real = torch.sigmoid(x_source_real)
        x_target_real = torch.sigmoid(x_target_real)
        """
        for the patch loss of Target image
        """
        x_target_patch_pro = dis_model_patch(x_target_patch)  # size(4, dim)
        gtav_dis_sigmoid_target = torch.sigmoid(x_target_dis)
        """
        obtain the weighting factor of target patches and calculate the target loss
        """
        x_target_patch_pro_mean2 = torch.mean(x_target_patch_pro,
                                              1)  # Size(4,1)
        fake_loss1_target = 0.0
        gtav_dis_sigmoid_target = torch.split(gtav_dis_sigmoid_target,
                                              1,
                                              dim=0)
        # allone_target_1 = (torch.ones(gtav_dis_sigmoid_target[0].size()).float().cuda())
        all_target_1 = generate_hard_label(1, gtav_dis_sigmoid_target[0])

        gtav_real_sigmoid_target = torch.split(x_target_real, 1, dim=0)
        all_target_0 = generate_hard_label(0, gtav_real_sigmoid_target[0])

        for clu_idx in range(0, len(gtav_dis_sigmoid_target)):
            fake_loss1_target += x_target_patch_pro_mean2[clu_idx] * (
                F.binary_cross_entropy(gtav_dis_sigmoid_target[clu_idx],
                                       all_target_1) +
                F.binary_cross_entropy(gtav_real_sigmoid_target[clu_idx],
                                       all_target_0))

        fake_loss1_source = 0.0
        x_source_fake_cluster2 = torch.split(x_source_dis, 1, dim=0)
        all_source_1 = generate_hard_label(1, x_source_fake_cluster2[0])
        x_source_real_cluster2 = torch.split(x_source_real, 1, dim=0)
        all_source_0 = generate_hard_label(0, x_source_real_cluster2[0])

        for clu_idx in range(0, len(x_source_fake_cluster2)):
            fake_loss1_source += (
                F.binary_cross_entropy(x_source_fake_cluster2[clu_idx],
                                       all_source_1) +
                F.binary_cross_entropy(x_source_real_cluster2[clu_idx],
                                       all_source_0))

        recon_loss = (fake_loss1_source + fake_loss1_target
                      ) / world_size  # no-discriminator in the Decoder

        # recon_loss = recon_loss
        recon_loss.backward(retain_graph=True)

        max_grad2 = 0.0
        for pp in dec_model.parameters():
            tmp = torch.max(pp.grad.data)
            if max_grad2 < tmp:
                max_grad2 = tmp

        if args.dist:
            average_gradients(dec_model)
        # torch.nn.utils.clip_grad_norm(dec_model.parameters(), 10.0)
        lr_scheduler_dec.optimizer.step()

        ##########################################################################
        ########################### (4): start detection_update ##################
        ##########################################################################
        """
        target feature maps --> source reconstruction
        for cross-domain alignment
        """
        x_source_recon, x_target_recon = dec_model(x_target_patch,
                                                   x_source_patch)
        x_source_dis, x_target_dis = dis_model(x_source_recon,
                                               x_target_recon)  # (4, dim)
        """
        weight of target patches
        """
        x_fake_dis_sigmoid = torch.sigmoid(x_target_dis)  #
        allone_11 = generate_hard_label(1, x_fake_dis_sigmoid)
        fake_loss_source = F.binary_cross_entropy(
            x_fake_dis_sigmoid, allone_11)  # NO discriminator in Detection
        x_fake_dis_sigmoid2 = torch.sigmoid(x_source_dis)

        x_fake_dis_sigmoid2_cluster = torch.split(x_fake_dis_sigmoid2,
                                                  1,
                                                  dim=0)
        allone_11_cluster = (torch.ones(
            x_fake_dis_sigmoid2_cluster[0].size()).float().cuda())

        fake_loss_target = 0.0
        for clu_idx in range(0, len(x_fake_dis_sigmoid2_cluster)):
            fake_loss_target += x_target_patch_pro_mean2[clu_idx] * (
                F.binary_cross_entropy(x_fake_dis_sigmoid2_cluster[clu_idx],
                                       allone_11_cluster))

        rpn_cls_loss, rpn_loc_loss, rcnn_cls_loss, rcnn_loc_loss = outputs[
            'losses']
        # gradient is averaged by normalizing the loss with world_size
        loss = (rpn_cls_loss + rpn_loc_loss + rcnn_cls_loss + rcnn_loc_loss +
                0.1 * (fake_loss_source + fake_loss_target)) / world_size

        lr_scheduler.optimizer.zero_grad()
        loss.backward()

        max_grad1 = 0.0
        for pp in model.parameters():
            tmp = torch.max(pp.grad.data)
            if max_grad1 < tmp:
                max_grad1 = tmp

        if args.dist:
            average_gradients(model)
        # torch.nn.utils.clip_grad_norm(model.parameters(), 1.0)
        lr_scheduler.optimizer.step()

        ##########################################################################
        ################################ Output information ######################
        ##########################################################################

        rpn_accuracy = outputs['accuracy'][0][0] / 100.
        rcnn_accuracy = outputs['accuracy'][1][0] / 100.

        t2 = time.time()
        lr = lr_scheduler.get_lr()[0]
        logger.info(
            'Epoch: [%d][%d/%d] LR:%f Time: %.3f Loss: %.5f (rpn_cls: %.5f rpn_loc: %.5f rpn_acc: %.5f'
            ' rcnn_cls: %.5f, rcnn_loc: %.5f rcnn_acc:%.5f fake_loss: %.5f dec_loss: %.5f  dis_loss: %.5f fake_loss1: %.5f)'
            % (epoch, iter, len(train_loader), lr, t2 - t0,
               loss.item() * world_size, rpn_cls_loss.item(),
               rpn_loc_loss.item(), rpn_accuracy, rcnn_cls_loss.item(),
               rcnn_loc_loss.item(), rcnn_accuracy, fake_loss_target.item(),
               recon_loss.item(), adloss.item(), fake_loss1_source.item()))
        print_speed((epoch - 1) * len(train_loader) + iter + 1, t2 - t0,
                    args.epochs * len(train_loader))
        t0 = t2
        logger.info("Max Grad, Det: %5f, Dec: %5f, Dis: %5f" %
                    (max_grad1, max_grad2, max_grad3))
コード例 #12
0
def validate_single(val_loader, model, cfg):
    global best_map
    logger = logging.getLogger('global')

    rank, world_size = 0, 1

    # switch to evaluate mode
    model.eval()
    total_rc = 0
    total_gt = 0

    logger.info('start validate')
    if not os.path.exists(args.results_dir):
        try:
            os.makedirs(args.results_dir)
        except Exception as e:
            print(e)
    # remove the original results file
    # if rank == 0:
    for f in os.listdir(args.results_dir):
        if 'results.txt.rank' in f and int(f.split('k')[-1]) >= world_size:
            logger.info("remove %s" % f)
            os.remove(os.path.join(args.results_dir, f))

    fout = open(os.path.join(args.results_dir, 'results.txt.rank%d' % rank),
                'w')

    for iter, input in enumerate(val_loader):
        img = (input[0]).cuda()
        img_info = input[1]
        gt_boxes = input[2]
        filenames = input[-1]
        x = {
            'cfg': cfg,
            'image': img,
            'image_info': img_info,
            'ground_truth_bboxes': gt_boxes,
            'ignore_regions': None
        }
        batch_size = img.shape[0]
        t1 = time.time()
        t0 = time.time()
        outputs = model(x)['predict']
        t2 = time.time()

        proposals = outputs[0].data.cpu().numpy()
        bboxes = outputs[1].data.cpu().numpy()
        if torch.is_tensor(gt_boxes):
            gt_boxes = gt_boxes.cpu().numpy()
        for b_ix in range(batch_size):
            img_id = filenames[b_ix].rsplit('/', 1)[-1].rsplit('.', 1)[0]
            img_resize_scale = img_info[b_ix, -1]
            if args.dataset == 'coco':
                img_resize_scale = img_info[b_ix, 2]
            rois_per_image = proposals[proposals[:, 0] == b_ix]
            dts_per_image = bboxes[bboxes[:, 0] == b_ix]
            gts_per_image = gt_boxes[b_ix]
            # rpn recall
            num_rc, num_gt = bbox_helper.compute_recall(
                rois_per_image[:, 1:1 + 4], gts_per_image)
            total_gt += num_gt
            total_rc += num_rc
            order = dts_per_image[:, -2].argsort()[::-1][:100]
            dts_per_image = dts_per_image[order]

            # faster-rcnn eval
            for cls in range(1, cfg['shared']['num_classes']):
                dts_per_cls = dts_per_image[dts_per_image[:, -1] == cls]
                gts_per_cls = gts_per_image[gts_per_image[:, -1] == cls]
                dts_per_cls = dts_per_cls[:, 1:-1]
                # dts_per_cls = dts_per_cls[dts_per_cls[:, -1] > 0.05]
                gts_per_cls = gts_per_cls[:, :-1]
                dts_per_cls = bbox_helper.clip_bbox(dts_per_cls,
                                                    img_info[b_ix, :2])
                if len(dts_per_cls) > 0:
                    dts_per_cls[:, :4] = dts_per_cls[:, :4] / img_resize_scale
                if len(gts_per_cls) > 0:
                    gts_per_cls[:, :4] = gts_per_cls[:, :4] / img_resize_scale
                for bx in dts_per_cls:
                    if args.dataset == 'coco':
                        fout.write('val2017/{0}.jpg {1} {2}\n'.format(
                            img_id, ' '.join(map(str, bx)), cls))
                    else:
                        fout.write('{0} {1} {2}\n'.format(
                            img_id, ' '.join(map(str, bx)), cls))
                fout.flush()
        logger.info('Test: [%d/%d] Time: %.3f %d/%d' %
                    (iter, len(val_loader), t2 - t0, total_rc, total_gt))
        print_speed(iter + 1, t2 - t0, len(val_loader))
    logger.info('rpn300 recall=%f' % (total_rc / total_gt))
    fout.close()
    """
    eval the cityscapes for getting the map
    """

    # eval coco ap with official python api
    if args.dataset == 'coco':
        eval_coco_ap(args.results_dir, 'bbox', args.val_meta_file)
    else:
        Cal_MAP(args.results_dir, args.val_meta_file,
                int(cfg['shared']['num_classes']))

    return total_rc / total_gt
コード例 #13
0
def test_model(model, test_dataloader, log_interval, device):
    """Test and Return the feature vector of all sample in dataset with its index.

    Args:
        cfg: (dict) 
            config file of the test precedure.
        model: (nn.module)
            loaded model
        test_dataloader: (torch.Dataloader)
            It should not be none! A non-triplet dataloader to validate data.
            It's sample protocal is:
                {
                    "img": target image,
                    "cls": target class, 
                    "other": other information,
                        {
                            "index" : index,
                        }
                }
        writer: (tensorboard writer)
        device: cuda or cpu

    Return:
        a list of dict:[
            {
                "cls": class label of the sample,
                "feature": feature vectuer of the result,
                "other": other information,
                {
                    "index": index of the sample in the dataset,
                }
            },
            ...,
            {
                "cls": class label of the sample,
                "feature": feature vectuer of the result,
                "other": other information,
                {
                    "index": index of the sample in the dataset,
                }
            }] 
    """
    logger.info("\n------------------------- Start Forwarding Dataset -------------------------\n")
    
    # epoch average meter
    avg_test = AverageMeter()

    # get test batch count
    current_test_batch = 0
    total_test_batch = len(test_dataloader)

    # to return list
    out_sample_list = []
    for batch_idx, batch_sample in enumerate(test_dataloader):
        # Skip last iteration to avoid the problem of having different number of tensors while calculating
        # averages (sizes of tensors must be the same for pairwise distance calculation)
        if batch_idx + 1 == len(test_dataloader):
            continue
        batch_size = test_dataloader.batch_size

        # switch to evaluation mode.
        for param in model.parameters():
            param.requires_grad = False
        model.eval()

        # start time counting
        batch_start_time_test = time.time()

        # Forward pass - compute embeddings
        imgs = batch_sample["img"]
        cls = batch_sample["cls"]
        indexs = batch_sample["other"]["index"]

        imgs = imgs.to(device)

        out_put = model(imgs)

        out_put.to("cpu")

        for i in range(batch_size):
            out_dict = {
                "cls": cls[i],
                "feature": out_put[i],
                "other": {
                    "index" : indexs[i]
                    },
            }
            out_sample_list.append(out_dict)
        
        # batch time & batch count
        current_test_batch += 1
        batch_time = time.time() - batch_start_time_test

        if current_test_batch % log_interval == 0:            
                print_speed(current_test_batch, batch_time, total_test_batch, "global")
    else:
        logger.info("\n------------------------- End Forwarding Dataset -------------------------\n")

    return out_sample_list
コード例 #14
0
ファイル: vis_siamrcnn.py プロジェクト: whjzsy/SiamRCNN
def train(train_loader, model, optimizer, lr_scheduler, epoch, cfg):
    global tb_index, best_acc, cur_lr, logger
    cur_lr = lr_scheduler.get_cur_lr()
    logger = logging.getLogger('global')
    avg = AverageMeter()
    model.train()
    # model.module.features.eval()
    # model.module.rpn_model.eval()
    # model.module.features.apply(BNtoFixed)
    # model.module.rpn_model.apply(BNtoFixed)
    #
    # model.module.mask_model.train()
    # model.module.refine_model.train()
    model = model.cuda()
    end = time.time()

    def is_valid_number(x):
        return not (math.isnan(x) or math.isinf(x) or x > 1e4)

    num_per_epoch = len(train_loader.dataset) // args.epochs // args.batch
    start_epoch = epoch
    epoch = epoch
    with torch.no_grad():
        for iter, input in enumerate(train_loader):
            if iter > 100:
                break

            if epoch != iter // num_per_epoch + start_epoch:  # next epoch
                epoch = iter // num_per_epoch + start_epoch

                if epoch == args.epochs:
                    return

                if model.module.features.unfix(epoch / args.epochs):
                    logger.info('unfix part model.')
                    optimizer, lr_scheduler = build_opt_lr(
                        model.module, cfg, args, epoch)

                lr_scheduler.step(epoch)
                cur_lr = lr_scheduler.get_cur_lr()

                logger.info('epoch:{}'.format(epoch))

            tb_index = iter
            if iter % num_per_epoch == 0 and iter != 0:
                for idx, pg in enumerate(optimizer.param_groups):
                    logger.info("epoch {} lr {}".format(epoch, pg['lr']))
                    tb_writer.add_scalar('lr/group%d' % (idx + 1), pg['lr'],
                                         tb_index)

            data_time = time.time() - end
            avg.update(data_time=data_time)
            x_rpn = {
                'cfg': cfg,
                'template': torch.autograd.Variable(input[0]).cuda(),
                'search': torch.autograd.Variable(input[1]).cuda(),
                'label_cls': torch.autograd.Variable(input[2]).cuda(),
                'label_loc': torch.autograd.Variable(input[3]).cuda(),
                'label_loc_weight': torch.autograd.Variable(input[4]).cuda(),
                'label_mask': torch.autograd.Variable(input[6]).cuda()
            }
            x_kp = input[7]
            x_kp = {
                x: torch.autograd.Variable(y).cuda()
                for x, y in x_kp.items()
            }
            x_rpn['anchors'] = train_loader.dataset.anchors.all_anchors[0]

            outputs = model(x_rpn, x_kp)
            roi_box = outputs['predict'][-1]
            pred_kp = outputs['predict'][2]['hm_hp']
            batch_img = x_rpn['search'].expand(x_kp['hm_hp'].size(0), -1, -1,
                                               -1)
            gt_img, pred_img = save_gt_pred_heatmaps(
                batch_img, x_kp['hm_hp'], pred_kp,
                'test_imgs/test_{}.jpg'.format(iter))
            # rpn_pred_cls, rpn_pred_loc = outputs['predict'][:2]
            # rpn_pred_cls = outputs['predict'][-1]
            # anchors = train_loader.dataset.anchors.all_anchors[0]
            #
            # normalized_boxes = proposal_layer([rpn_pred_cls, rpn_pred_loc], anchors, config=cfg)
            # print('rpn_pred_cls: ', rpn_pred_cls.shape)

            rpn_cls_loss, rpn_loc_loss, kp_losses = torch.mean(outputs['losses'][0]),\
                                                        torch.mean(outputs['losses'][1]),\
                                                        outputs['losses'][3]
            kp_loss = torch.mean(kp_losses['loss'])
            kp_hp_loss = torch.mean(kp_losses['hp_loss'])
            kp_hm_hp_loss = torch.mean(kp_losses['hm_hp_loss'])
            kp_hp_offset_loss = torch.mean(kp_losses['hp_offset_loss'])

            # mask_iou_mean, mask_iou_at_5, mask_iou_at_7 = torch.mean(outputs['accuracy'][0]), torch.mean(outputs['accuracy'][1]), torch.mean(outputs['accuracy'][2])

            cls_weight, reg_weight, kp_weight = cfg['loss']['weight']

            loss = rpn_cls_loss * cls_weight + rpn_loc_loss * reg_weight + kp_loss * kp_weight

            optimizer.zero_grad()
            loss.backward()

            if cfg['clip']['split']:
                torch.nn.utils.clip_grad_norm_(
                    model.module.features.parameters(), cfg['clip']['feature'])
                torch.nn.utils.clip_grad_norm_(
                    model.module.rpn_model.parameters(), cfg['clip']['rpn'])
                torch.nn.utils.clip_grad_norm_(
                    model.module.mask_model.parameters(), cfg['clip']['mask'])
            else:
                torch.nn.utils.clip_grad_norm_(model.parameters(),
                                               args.clip)  # gradient clip

            if is_valid_number(loss.item()):
                optimizer.step()

            siammask_loss = loss.item()

            batch_time = time.time() - end

            avg.update(batch_time=batch_time,
                       rpn_cls_loss=rpn_cls_loss,
                       rpn_loc_loss=rpn_loc_loss,
                       kp_hp_loss=kp_hp_loss,
                       kp_hm_hp_loss=kp_hm_hp_loss,
                       kp_hp_offset_loss=kp_hp_offset_loss,
                       kp_loss=kp_loss,
                       siammask_loss=siammask_loss)
            # mask_iou_mean=mask_iou_mean, mask_iou_at_5=mask_iou_at_5, mask_iou_at_7=mask_iou_at_7)

            tb_writer.add_scalar('loss/cls', rpn_cls_loss, tb_index)
            tb_writer.add_scalar('loss/loc', rpn_loc_loss, tb_index)
            tb_writer.add_scalar('loss/kp_hp_loss', kp_hp_loss, tb_index)
            tb_writer.add_scalar('loss/kp_hm_hp_loss', kp_hm_hp_loss, tb_index)
            tb_writer.add_scalar('loss/kp_hp_offset_loss', kp_hp_offset_loss,
                                 tb_index)
            # tb_writer.add_scalar('loss/kp', kp_loss, tb_index)
            end = time.time()

            if (iter + 1) % args.print_freq == 0:
                logger.info(
                    'Epoch: [{0}][{1}/{2}] lr: {lr:.6f}\t{batch_time:s}\t{data_time:s}'
                    '\t{rpn_cls_loss:s}\t{rpn_loc_loss:s}'
                    '\t{kp_hp_loss:s}\t{kp_hm_hp_loss:s}\t{kp_hp_offset_loss:s}'
                    '\t{kp_loss:s}\t{siammask_loss:s}'.format(
                        epoch + 1,
                        (iter + 1) % num_per_epoch,
                        num_per_epoch,
                        lr=cur_lr,
                        batch_time=avg.batch_time,
                        data_time=avg.data_time,
                        rpn_cls_loss=avg.rpn_cls_loss,
                        rpn_loc_loss=avg.rpn_loc_loss,
                        kp_hp_loss=avg.kp_hp_loss,
                        kp_hm_hp_loss=avg.kp_hm_hp_loss,
                        kp_hp_offset_loss=avg.kp_hp_offset_loss,
                        kp_loss=avg.kp_loss,
                        siammask_loss=avg.siammask_loss,
                    ))
                # mask_iou_mean=avg.mask_iou_mean,
                # mask_iou_at_5=avg.mask_iou_at_5,mask_iou_at_7=avg.mask_iou_at_7))
                print_speed(iter + 1, avg.batch_time.avg,
                            args.epochs * num_per_epoch)
コード例 #15
0
ファイル: vis_siampose.py プロジェクト: whjzsy/SiamRCNN
def train(train_loader, model, optimizer, lr_scheduler, epoch, cfg):
    global tb_index, best_acc, cur_lr, logger
    cur_lr = lr_scheduler.get_cur_lr()
    logger = logging.getLogger('global')
    avg = AverageMeter()
    model.train()
    model = model.cuda()
    end = time.time()

    def is_valid_number(x):
        return not (math.isnan(x) or math.isinf(x) or x > 1e4)

    num_per_epoch = len(train_loader.dataset) // args.epochs // args.batch
    start_epoch = epoch
    epoch = epoch
    for iter, input in enumerate(train_loader):

        if epoch != iter // num_per_epoch + start_epoch:  # next epoch
            epoch = iter // num_per_epoch + start_epoch

            if not os.path.exists(args.save_dir):  # makedir/save model
                os.makedirs(args.save_dir)

            save_checkpoint(
                {
                    'epoch': epoch,
                    'arch': args.arch,
                    'state_dict': model.module.state_dict(),
                    'best_acc': best_acc,
                    'optimizer': optimizer.state_dict(),
                    'anchor_cfg': cfg['anchors']
                }, False,
                os.path.join(args.save_dir, 'checkpoint_e%d.pth' % (epoch)),
                os.path.join(args.save_dir, 'best.pth'))

            if epoch == args.epochs:
                return

            if model.module.features.unfix(epoch / args.epochs):
                logger.info('unfix part model.')
                optimizer, lr_scheduler = build_opt_lr(model.module, cfg, args,
                                                       epoch)

            lr_scheduler.step(epoch)
            cur_lr = lr_scheduler.get_cur_lr()

            logger.info('epoch:{}'.format(epoch))

        tb_index = iter
        if iter % num_per_epoch == 0 and iter != 0:
            for idx, pg in enumerate(optimizer.param_groups):
                logger.info("epoch {} lr {}".format(epoch, pg['lr']))
                tb_writer.add_scalar('lr/group%d' % (idx + 1), pg['lr'],
                                     tb_index)

        data_time = time.time() - end
        avg.update(data_time=data_time)
        x = {
            'cfg': cfg,
            'template': torch.autograd.Variable(input[0]).cuda(),
            'search': torch.autograd.Variable(input[1]).cuda(),
            'label_cls': torch.autograd.Variable(input[2]).cuda(),
            'label_loc': torch.autograd.Variable(input[3]).cuda(),
            'label_loc_weight': torch.autograd.Variable(input[4]).cuda(),
            'label_mask': torch.autograd.Variable(input[6]).cuda(),
            'label_kp_weight': torch.autograd.Variable(input[7]).cuda(),
            'label_mask_weight': torch.autograd.Variable(input[8]).cuda(),
        }

        outputs = model(x)
        # print(x['search'].shape)
        pred_mask = outputs['predict'][2]
        pred_mask = select_pred_heatmap(
            pred_mask,
            x['label_mask_weight'])  #is rpn_pred_mask (bs, 17, 127, 127)

        true_search = select_gt_img(x['search'], x['label_mask_weight'])
        if true_search.shape:
            save_batch_heatmaps(true_search,
                                pred_mask,
                                vis_outpath + '{}.jpg'.format(iter),
                                normalize=True)

        # pred_mask = pred_mask.cpu(.sh).detach().numpy()
        # true_search = true_search.cpu().detach().numpy()

        # print("pose_mask", pred_mask.shape)
        # pose_heat = np.transpose(pred_mask[0,:,:,:],(1,2,0))   #shape (127,127,17)

        # plt.figure(num='image', figsize=(128,128))
        #
        # plt.subplot(1, 2, 1)
        # plt.title('origin image')
        # plt.imshow(np.transpose(true_search[0,:,:,:], (1,2,0)))
        #
        # plt.subplot(1, 2, 2)
        # plt.title('heatmap')
        # pose_map = np.zeros((127,127), np.float32)
        # for i in range(pred_mask.shape[1]):
        #     pose_map += pose_heat[:,:,i]
        # plt.imshow(pose_map)
        # plt.axis('off')
        #
        #
        # plt.show()

        # 可视化: 把17个map都投影到一张黑色图片上

        rpn_cls_loss, rpn_loc_loss, rpn_mask_loss = torch.mean(outputs['losses'][0]),\
                                                    torch.mean(outputs['losses'][1]),\
                                                    torch.mean(outputs['losses'][2])

        # mask_iou_mean, mask_iou_at_5, mask_iou_at_7 = torch.mean(outputs['accuracy'][0]), torch.mean(outputs['accuracy'][1]), torch.mean(outputs['accuracy'][2])

        cls_weight, reg_weight, mask_weight = cfg['loss']['weight']

        loss = rpn_cls_loss * cls_weight + rpn_loc_loss * reg_weight + rpn_mask_loss * mask_weight

        optimizer.zero_grad()
        loss.backward()

        if cfg['clip']['split']:
            torch.nn.utils.clip_grad_norm_(model.module.features.parameters(),
                                           cfg['clip']['feature'])
            torch.nn.utils.clip_grad_norm_(model.module.rpn_model.parameters(),
                                           cfg['clip']['rpn'])
            torch.nn.utils.clip_grad_norm_(
                model.module.mask_model.parameters(), cfg['clip']['mask'])
        else:
            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                           args.clip)  # gradient clip

        if is_valid_number(loss.item()):
            optimizer.step()

        siammask_loss = loss.item()

        batch_time = time.time() - end

        avg.update(batch_time=batch_time,
                   rpn_cls_loss=rpn_cls_loss,
                   rpn_loc_loss=rpn_loc_loss,
                   rpn_mask_loss=rpn_mask_loss * mask_weight,
                   siammask_loss=siammask_loss)
        # mask_iou_mean=mask_iou_mean, mask_iou_at_5=mask_iou_at_5, mask_iou_at_7=mask_iou_at_7)

        tb_writer.add_scalar('loss/cls', rpn_cls_loss, tb_index)
        tb_writer.add_scalar('loss/loc', rpn_loc_loss, tb_index)
        tb_writer.add_scalar('loss/mask', rpn_mask_loss * mask_weight,
                             tb_index)
        # tb_writer.add_scalar('mask/mIoU', mask_iou_mean, tb_index)
        # tb_writer.add_scalar('mask/[email protected]', mask_iou_at_5, tb_index)
        # tb_writer.add_scalar('mask/[email protected]', mask_iou_at_7, tb_index)
        end = time.time()

        if (iter + 1) % args.print_freq == 0:
            logger.info(
                'Epoch: [{0}][{1}/{2}] lr: {lr:.6f}\t{batch_time:s}\t{data_time:s}'
                '\t{rpn_cls_loss:s}\t{rpn_loc_loss:s}\t{rpn_mask_loss:s}\t{siammask_loss:s}'
                .format(
                    epoch + 1,
                    (iter + 1) % num_per_epoch,
                    num_per_epoch,
                    lr=cur_lr,
                    batch_time=avg.batch_time,
                    data_time=avg.data_time,
                    rpn_cls_loss=avg.rpn_cls_loss,
                    rpn_loc_loss=avg.rpn_loc_loss,
                    rpn_mask_loss=avg.rpn_mask_loss,
                    siammask_loss=avg.siammask_loss,
                ))
            # mask_iou_mean=avg.mask_iou_mean,
            # mask_iou_at_5=avg.mask_iou_at_5,mask_iou_at_7=avg.mask_iou_at_7))
            print_speed(iter + 1, avg.batch_time.avg,
                        args.epochs * num_per_epoch)
コード例 #16
0
ファイル: grad_train.py プロジェクト: whjzsy/Siamtracker
def train(dataloader, optimizer, model):
    iter = 0
    begin_time = 0.0
    average_meter = AverageMeter()
    num_per_epoch = len(dataloader.dataset) // (cfg.GRAD.BATCH_SIZE)
    tb_writer = SummaryWriter(cfg.GRAD.LOG_DIR)
    for epoch in range(cfg.GRAD.EPOCHS):
        dataloader.dataset.shuffle()
        begin_time = time.time()
        for data in dataloader:
            examplar_img = data['examplar_img'].cuda()

            train_search_img = data['train_search_img'].cuda()
            train_gt_cls = data['train_gt_cls'].cuda()
            train_gt_delta = data['train_gt_delta'].cuda()
            train_delta_weight = data['train_delta_weight'].cuda()

            test_search_img = data['test_search_img'].cuda()
            test_gt_cls = data['test_gt_cls'].cuda()
            test_gt_delta = data['test_gt_delta'].cuda()
            test_delta_weight = data['test_delta_weight'].cuda()
            data_time = time.time() - begin_time

            losses = model.forward(examplar_img, train_search_img,
                                   train_gt_cls, train_gt_delta,
                                   train_delta_weight, test_search_img,
                                   test_gt_cls, test_gt_delta,
                                   test_delta_weight)
            cls_loss = losses['cls_loss']
            loc_loss = losses['loc_loss']
            loss = losses['total_loss']
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            batch_time = time.time() - begin_time
            batch_info = {}
            batch_info['data_time'] = data_time
            batch_info['batch_time'] = batch_time
            average_meter.update(**batch_info)
            # add summary writer
            for k, v in losses.items():
                if k.startswith('examplar'):
                    tb_writer.add_histogram(k, v, iter)
                else:
                    tb_writer.add_scalar(k, v, iter)
            if iter % cfg.TRAIN.PRINT_EVERY == 0:
                logger.info(
                    'epoch: {}, iter: {}, init_cls_loss: {}, init_loc_loss: {}, init_loss: {}'
                    .format(epoch + 1, iter, losses['init_cls_loss'].item(),
                            losses['init_loc_loss'].item(),
                            losses['init_total_loss'].item()))
                logger.info(
                    'epoch: {}, iter: {}, cls_loss: {}, loc_loss: {}, loss: {}'
                    .format(epoch + 1, iter, cls_loss.item(), loc_loss.item(),
                            loss.item()))
                print_speed(iter + 1, average_meter.batch_time.avg,
                            cfg.GRAD.EPOCHS * num_per_epoch)
            begin_time = time.time()
            iter += 1
        # save train_state
        if not os.path.exists(cfg.GRAD.SNAPSHOT_DIR):
            os.makedirs(cfg.GRAD.SNAPSHOT_DIR)
        # put the update to the rpn state
        state = {
            "model": model.state_dict(),
            "optimizer": optimizer.state_dict(),
            "epoch": epoch,
        }
        save_path = "{}/checkpoint_e{}.pth".format(cfg.GRAD.SNAPSHOT_DIR,
                                                   epoch)
        logger.info("save state to {}".format(save_path))
        torch.save(state, save_path)
コード例 #17
0
def main():
    # init logger
    init_log('global', args.save_dir, logging.INFO)
    logger = logging.getLogger('global')
    # print arguments
    for arg in vars(args):
        logger.info("{}: {}".format(arg, getattr(args, arg)))

    # get device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # build dataloader and model
    train_loader, test_loader = build_nyu_dataloader(args.dataset_dir)
    opts = {"L": 5, "k": 12, "bn": True}
    model = D3(opts)

    # check GPU numbers and deploy parallel
    # parallel = False
    # if torch.cuda.device_count() > 1:
    #     parallel = True
    #     logger.info("Let's use {:d} GPUs!".format(torch.cuda.device_count()))
    #     # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
    #     model = nn.DataParallel(model)
    model.to(device)

    logger.info("*" * 40)
    logger.info(model)
    logger.info("*" * 40)

    # optimizer settings
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    # optionally resume from a checkpoint
    # if args.resume:
    #     if os.path.isfile(args.resume):
    #         model, _, args.start_epoch = restore_from(model, optimizer, args.resume)

    # set the best model
    best_model_wts = copy.deepcopy(model.state_dict())
    best_abs_rel = 0.0
    logger.info("Start training...")

    # epoches = args.batches // train_loader.__len__()

    for epoch in range(args.epoches):

        for g in optimizer.param_groups:
            g['lr'] = args.lr * (1 - args.lr_decay)**(epoch //
                                                      args.lr_decay_step)
        writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch)

        t0 = time.time()
        train_one_epoch(train_loader, model, optimizer, device, epoch)
        t1 = time.time()

        if epoch % args.test_rate == 0:
            test_abs_rel = test_one_epoch(test_loader, model, device, epoch)
            if test_abs_rel < best_abs_rel:
                best_model_wts = copy.deepcopy(model.state_dict())

        torch.cuda.empty_cache()

        if epoch % args.test_rate == 0:
            filename = os.path.join(args.save_dir,
                                    'checkpoint_e%d.pth' % (epoch + 1))
            save_checkpoint(
                {
                    'epoch': epoch,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict()
                },
                is_best=False,
                filename=filename)
            logger.info("Saved model : {}".format(filename))

        print_speed(epoch, t1 - t0, args.epoches)

        save_checkpoint(
            {
                'batch_num': epoch,
                'state_dict': best_model_wts,
                'optimizer': optimizer.state_dict()
            },
            is_best=True,
            filename=os.path.join(args.save_dir, 'model_best.pth'))

    writer.close()
コード例 #18
0
def train(train_loader, model, optimizer, lr_scheduler, epoch, cfg):
    global tb_index, best_acc, cur_lr, logger
    cur_lr = lr_scheduler.get_cur_lr()
    logger = logging.getLogger('global')
    avg = AverageMeter()
    model.train()
    model = model.cuda()
    end = time.time()

    def is_valid_number(x):
        return not (math.isnan(x) or math.isinf(x) or x > 1e4)

    num_per_epoch = len(train_loader.dataset) // args.epochs // args.batch
    start_epoch = epoch
    epoch = epoch
    for iter, input in enumerate(train_loader):

        if epoch != iter // num_per_epoch + start_epoch:  # next epoch
            epoch = iter // num_per_epoch + start_epoch

            if not os.path.exists(args.save_dir):  # makedir/save model
                os.makedirs(args.save_dir)

            save_checkpoint(
                {
                    'epoch': epoch,
                    'arch': args.arch,
                    'state_dict': model.module.state_dict(),
                    'best_acc': best_acc,
                    'optimizer': optimizer.state_dict(),
                    'anchor_cfg': cfg['anchors']
                }, False,
                os.path.join(args.save_dir, 'checkpoint_e%d.pth' % (epoch)),
                os.path.join(args.save_dir, 'best.pth'))

            if epoch == args.epochs:
                return

            if model.module.features.unfix(epoch / args.epochs):
                logger.info('unfix part model.')
                optimizer, lr_scheduler = build_opt_lr(model.module, cfg, args,
                                                       epoch)

            lr_scheduler.step(epoch)
            cur_lr = lr_scheduler.get_cur_lr()

            logger.info('epoch:{}'.format(epoch))

        tb_index = iter
        if iter % num_per_epoch == 0 and iter != 0:
            for idx, pg in enumerate(optimizer.param_groups):
                logger.info("epoch {} lr {}".format(epoch, pg['lr']))
                tb_writer.add_scalar('lr/group%d' % (idx + 1), pg['lr'],
                                     tb_index)

        data_time = time.time() - end
        avg.update(data_time=data_time)
        x = {
            'cfg': cfg,
            'template': torch.autograd.Variable(input[0]).cuda(),
            'search': torch.autograd.Variable(input[1]).cuda(),
            'label_cls': torch.autograd.Variable(input[2]).cuda(),
            'label_loc': torch.autograd.Variable(input[3]).cuda(),
            'label_loc_weight': torch.autograd.Variable(input[4]).cuda(),
            'label_mask': torch.autograd.Variable(input[6]).cuda(),
            'label_mask_weight': torch.autograd.Variable(input[7]).cuda(),
        }

        outputs = model(x)

        gt_mask = x['label_mask']
        gt_mask = select_gt_img(gt_mask, x['label_mask_weight'])
        pred_mask = outputs['predict'][2]
        pred_mask = select_pred_heatmap(
            pred_mask, x['label_mask_weight'])  #(bs, channel, 127, 127)

        # print("gt_mask", gt_mask.shape)
        # print("pred_mask", pred_mask.shape)

        rpn_cls_loss, rpn_loc_loss, rpn_mask_loss = torch.mean(
            outputs['losses'][0]), torch.mean(
                outputs['losses'][1]), torch.mean(outputs['losses'][2])
        mask_iou_mean, mask_iou_at_5, mask_iou_at_7 = torch.mean(
            outputs['accuracy'][0]), torch.mean(
                outputs['accuracy'][1]), torch.mean(outputs['accuracy'][2])

        cls_weight, reg_weight, mask_weight = cfg['loss']['weight']

        loss = rpn_cls_loss * cls_weight + rpn_loc_loss * reg_weight + rpn_mask_loss * mask_weight

        optimizer.zero_grad()
        loss.backward()

        if cfg['clip']['split']:
            torch.nn.utils.clip_grad_norm_(model.module.features.parameters(),
                                           cfg['clip']['feature'])
            torch.nn.utils.clip_grad_norm_(model.module.rpn_model.parameters(),
                                           cfg['clip']['rpn'])
            torch.nn.utils.clip_grad_norm_(
                model.module.mask_model.parameters(), cfg['clip']['mask'])
        else:
            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                           args.clip)  # gradient clip

        if is_valid_number(loss.item()):
            optimizer.step()

        siammask_loss = loss.item()

        batch_time = time.time() - end

        avg.update(batch_time=batch_time,
                   rpn_cls_loss=rpn_cls_loss,
                   rpn_loc_loss=rpn_loc_loss,
                   rpn_mask_loss=rpn_mask_loss,
                   siammask_loss=siammask_loss,
                   mask_iou_mean=mask_iou_mean,
                   mask_iou_at_5=mask_iou_at_5,
                   mask_iou_at_7=mask_iou_at_7)

        tb_writer.add_scalar('loss/cls', rpn_cls_loss, tb_index)
        tb_writer.add_scalar('loss/loc', rpn_loc_loss, tb_index)
        tb_writer.add_scalar('loss/mask', rpn_mask_loss, tb_index)
        tb_writer.add_scalar('mask/mIoU', mask_iou_mean, tb_index)
        tb_writer.add_scalar('mask/[email protected]', mask_iou_at_5, tb_index)
        tb_writer.add_scalar('mask/[email protected]', mask_iou_at_7, tb_index)
        if tb_index % 200 == 0:
            tb_writer.add_image('gt_img', gt_mask[0, :, :, :], tb_index)
            tb_writer.add_image('pred_img', pred_mask[0, :, :, :], tb_index)

        end = time.time()

        if (iter + 1) % args.print_freq == 0:
            logger.info(
                'Epoch: [{0}][{1}/{2}] lr: {lr:.6f}\t{batch_time:s}\t{data_time:s}'
                '\t{rpn_cls_loss:s}\t{rpn_loc_loss:s}\t{rpn_mask_loss:s}\t{siammask_loss:s}'
                '\t{mask_iou_mean:s}\t{mask_iou_at_5:s}\t{mask_iou_at_7:s}'.
                format(epoch + 1, (iter + 1) % num_per_epoch,
                       num_per_epoch,
                       lr=cur_lr,
                       batch_time=avg.batch_time,
                       data_time=avg.data_time,
                       rpn_cls_loss=avg.rpn_cls_loss,
                       rpn_loc_loss=avg.rpn_loc_loss,
                       rpn_mask_loss=avg.rpn_mask_loss,
                       siammask_loss=avg.siammask_loss,
                       mask_iou_mean=avg.mask_iou_mean,
                       mask_iou_at_5=avg.mask_iou_at_5,
                       mask_iou_at_7=avg.mask_iou_at_7))
            print_speed(iter + 1, avg.batch_time.avg,
                        args.epochs * num_per_epoch)
コード例 #19
0
ファイル: train.py プロジェクト: raeony/deepmask-pytorch
def train(train_loader, model, criterion, optimizer, epoch):
    logger = logging.getLogger('global')
    batch_time = AverageMeter()
    data_time = AverageMeter()
    mask_losses = AverageMeter()
    score_losses = AverageMeter()

    # switch to train mode
    model.train()
    if args.freeze_bn:
        model.apply(BNtoFixed)
    train_loader.dataset.shuffle()

    end = time.time()
    for i, (img, target, head_status) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        img = img.to(device)
        target = target.to(device)

        # compute output
        output = model(img)
        loss = criterion(output[head_status[0]], target)

        # measure and record loss
        if head_status[0] == 0:
            mask_losses.update(loss.item(), img.size(0))
            loss.mul_(img.size(0))
        else:
            score_losses.update(loss.item(), img.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()  # gradOutputs:mul(self.inputs:size(1))
        # torch.nn.utils.clip_grad_norm_(model.parameters(), 10)  # REMOVE?
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if args.visualize and head_status[0] == 0:
            visual_batch(img, output[0].sigmoid(), target)

        if i % args.print_freq == 0:
            logger.info(
                'Epoch: [{0}][{1}/{2}]\t'
                'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                'Data {data_time.val:.3f} ({data_time.avg:.3f})\n'
                'LR {lr:.1e} \t Mask Loss {mask_loss.val:.4f} ({mask_loss.avg:.4f})\t'
                'Score Loss {score_loss.val:.3f} ({score_loss.avg:.3f})'.
                format(epoch,
                       i,
                       len(train_loader),
                       batch_time=batch_time,
                       lr=optimizer.param_groups[0]['lr'],
                       data_time=data_time,
                       mask_loss=mask_losses,
                       score_loss=score_losses))
            print_speed(epoch * len(train_loader) + i + 1, batch_time.avg,
                        args.maxepoch * len(train_loader))
        step = epoch * len(train_loader) + i + 1
        if head_status[0] == 0:
            mask_true_iSz = torch.nn.functional.interpolate(target,
                                                            size=(args.iSz,
                                                                  args.iSz))
            mask_true = torch.nn.functional.pad(mask_true_iSz,
                                                (16, 16, 16, 16))
            mask_pred_iSz = torch.nn.functional.interpolate(output[0],
                                                            size=(args.iSz,
                                                                  args.iSz))
            mask_pred = torch.nn.functional.pad(mask_pred, (16, 16, 16, 16))
            writer.add_images('train/img',
                              img,
                              global_step=step,
                              dataformats='NCHW')
            writer.add_images('train/mask_true',
                              mask_true,
                              global_step=step,
                              dataformats='NCHW')
            writer.add_images('train/mask_pred',
                              mask_pred,
                              global_step=step,
                              dataformats='NCHW')
    writer.add_scalar('train_loss/mask_loss', mask_losses.avg, epoch)
    writer.add_scalar('train_loss/score_losses', score_losses.avg, epoch)