def validate(val_loader,
             model,
             criterion,
             epoch,
             log_freq=1,
             print_sum=True,
             device=None,
             stereo=True):

    losses = AverageMeter()

    # set model to evaluation
    model.eval()

    with torch.no_grad():
        epoch_time = time.time()
        end = time.time()
        for idx, (batch_images, batch_poses) in enumerate(val_loader):
            data_time = time.time() - end

            if stereo:
                batch_images = [x.to(device) for x in batch_images]
                batch_poses = [x.to(device) for x in batch_poses]
            else:
                batch_images = batch_images.to(device)
                batch_poses = batch_poses.to(device)

            # compute model output
            out = model(batch_images)
            loss = criterion(out, batch_poses)

            losses.update(
                loss.data[0],
                len(batch_images) *
                batch_images[0].size(0) if stereo else batch_images.size(0))

            batch_time = time.time() - end
            end = time.time()

            if log_freq != 0 and idx % log_freq == 0:
                print('Val Epoch: {}\t'
                      'Time: {batch_time:.3f}\t'
                      'Data Time: {data_time:.3f}\t'
                      'Loss: {losses.val:.3f}\t'
                      'Avg Loss: {losses.avg:.3f}'.format(
                          epoch,
                          batch_time=batch_time,
                          data_time=data_time,
                          losses=losses))

            # if idx == 0:
            #     break

    if print_sum:
        print(
            'Epoch: [{}]\tValidation Loss: {:.3f}\tEpoch time: {:.3f}'.format(
                epoch, losses.avg, (time.time() - epoch_time)))
Beispiel #2
0
def eval_model(val_loader, model, criterion, eval_metric, epoch, use_cuda):
    losses = AverageMeter()
    y_pred = []
    y_label = []
    model.train(False)
    torch.set_grad_enabled(False)
    for i, data in enumerate(val_loader):
        xi, xv, y = data[0], data[1], data[2]
        if use_cuda:
            xi, xv, y = xi.cuda(), xv.cuda(), y.cuda()
        outputs = model(xi, xv)
        loss = criterion(outputs, y)
        pred = torch.sigmoid(outputs).cpu()
        y_pred.extend(pred.data.numpy())
        y_label.extend(y.data.numpy())
        losses.update(loss.item(), y.shape[0])
    total_metric = eval_metric(y_label, y_pred)
    return losses.avg, total_metric
Beispiel #3
0
def train_epoch(train_loader, model, criterion, optimizer, epoch, use_cuda):
    losses = AverageMeter()
    model.train(True)
    torch.set_grad_enabled(True)
    for i, data in enumerate(train_loader):
        xi, xv, y = data[0], data[1], data[2]
        if use_cuda:
            xi, xv, y = xi.cuda(), xv.cuda(), y.cuda()
        optimizer.zero_grad()
        outputs = model(xi, xv)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()
        losses.update(loss.item(), y.shape[0])

        progress_bar(i, len(train_loader),
                     'batch {}, train loss {:.5f}'.format(i, losses.avg))
    logging.info('Epoch: [{0}]\t Loss {loss.avg:.4f}\t'.format(epoch,
                                                               loss=losses))
Beispiel #4
0
def train(args):
    if args.batch_size % args.num_instance != 0:
        new_batch_size = (args.batch_size //
                          args.num_instance) * args.num_instance
        print(
            f"given batch size is {args.batch_size} and num_instances is {args.num_instance}."
            +
            f"Batch size must be divided into {args.num_instance}. Batch size will be replaced into {new_batch_size}"
        )
        args.batch_size = new_batch_size

    # prepare dataset
    train_loader, val_loader, num_query, train_data_len, num_classes = make_data_loader(
        args)

    model = build_model(args, num_classes)
    print("model size: {:.5f}M".format(
        sum(p.numel() for p in model.parameters()) / 1e6))
    loss_fn, center_criterion = make_loss(args, num_classes)
    optimizer, optimizer_center = make_optimizer(args, model, center_criterion)

    if args.cuda:
        model = model.cuda()
        if args.amp:
            if args.center_loss:
                model, [optimizer, optimizer_center] = \
                    amp.initialize(model, [optimizer, optimizer_center], opt_level="O1")
            else:
                model, optimizer = amp.initialize(model,
                                                  optimizer,
                                                  opt_level="O1")

        for state in optimizer.state.values():
            for k, v in state.items():
                if isinstance(v, torch.Tensor):
                    state[k] = v.cuda()
        if args.center_loss:
            center_criterion = center_criterion.cuda()
            for state in optimizer_center.state.values():
                for k, v in state.items():
                    if isinstance(v, torch.Tensor):
                        state[k] = v.cuda()

    model_state_dict = model.state_dict()
    optim_state_dict = optimizer.state_dict()
    if args.center_loss:
        optim_center_state_dict = optimizer_center.state_dict()
        center_state_dict = center_criterion.state_dict()

    reid_evaluator = ReIDEvaluator(args, model, num_query)

    start_epoch = 0
    global_step = 0
    if args.pretrain != '':  # load pre-trained model
        weights = torch.load(args.pretrain)
        model_state_dict = weights["state_dict"]

        model.load_state_dict(model_state_dict)
        if args.center_loss:
            center_criterion.load_state_dict(
                torch.load(args.pretrain.replace(
                    'model', 'center_param'))["state_dict"])

        if args.resume:
            start_epoch = weights["epoch"]
            global_step = weights["global_step"]

            optimizer.load_state_dict(
                torch.load(args.pretrain.replace('model',
                                                 'optimizer'))["state_dict"])
            if args.center_loss:
                optimizer_center.load_state_dict(
                    torch.load(
                        args.pretrain.replace(
                            'model', 'optimizer_center'))["state_dict"])
        print(f'Start epoch: {start_epoch}, Start step: {global_step}')

    scheduler = WarmupMultiStepLR(optimizer, args.steps, args.gamma,
                                  args.warmup_factor, args.warmup_step,
                                  "linear",
                                  -1 if start_epoch == 0 else start_epoch)

    current_epoch = start_epoch
    best_epoch = 0
    best_rank1 = 0
    best_mAP = 0
    if args.resume:
        rank, mAP = reid_evaluator.evaluate(val_loader)
        best_rank1 = rank[0]
        best_mAP = mAP
        best_epoch = current_epoch + 1

    batch_time = AverageMeter()
    total_losses = AverageMeter()

    model_save_dir = os.path.join(args.save_dir, 'ckpts')
    os.makedirs(model_save_dir, exist_ok=True)

    summary_writer = SummaryWriter(log_dir=os.path.join(
        args.save_dir, "tensorboard_log"),
                                   purge_step=global_step)

    def summary_loss(score, feat, labels, top_name='global'):
        loss = 0.0
        losses = loss_fn(score, feat, labels)
        for loss_name, loss_val in losses.items():
            if loss_name.lower() == "accuracy":
                summary_writer.add_scalar(f"Score/{top_name}/triplet",
                                          loss_val, global_step)
                continue
            if "dist" in loss_name.lower():
                summary_writer.add_histogram(f"Distance/{loss_name}", loss_val,
                                             global_step)
                continue
            loss += loss_val
            summary_writer.add_scalar(f"losses/{top_name}/{loss_name}",
                                      loss_val, global_step)

        ohe_labels = torch.zeros_like(score)
        ohe_labels.scatter_(1, labels.unsqueeze(1), 1.0)

        cls_score = torch.softmax(score, dim=1)
        cls_score = torch.sum(cls_score * ohe_labels, dim=1).mean()
        summary_writer.add_scalar(f"Score/{top_name}/X-entropy", cls_score,
                                  global_step)

        return loss

    def save_weights(file_name, eph, steps):
        torch.save(
            {
                "state_dict": model_state_dict,
                "epoch": eph + 1,
                "global_step": steps
            }, file_name)
        torch.save({"state_dict": optim_state_dict},
                   file_name.replace("model", "optimizer"))
        if args.center_loss:
            torch.save({"state_dict": center_state_dict},
                       file_name.replace("model", "optimizer_center"))
            torch.save({"state_dict": optim_center_state_dict},
                       file_name.replace("model", "center_param"))

    # training start
    for epoch in range(start_epoch, args.max_epoch):
        model.train()
        t0 = time.time()
        for i, (inputs, labels, _, _) in enumerate(train_loader):
            if args.cuda:
                inputs = inputs.cuda()
                labels = labels.cuda()

            cls_scores, features = model(inputs, labels)

            # losses
            total_loss = summary_loss(cls_scores[0], features[0], labels,
                                      'global')
            if args.use_local_feat:
                total_loss += summary_loss(cls_scores[1], features[1], labels,
                                           'local')

            optimizer.zero_grad()
            if args.center_loss:
                optimizer_center.zero_grad()

            # backward with global loss
            if args.amp:
                optimizers = [optimizer]
                if args.center_loss:
                    optimizers.append(optimizer_center)
                with amp.scale_loss(total_loss, optimizers) as scaled_loss:
                    scaled_loss.backward()
            else:
                with torch.autograd.detect_anomaly():
                    total_loss.backward()

            # optimization
            optimizer.step()
            if args.center_loss:
                for name, param in center_criterion.named_parameters():
                    try:
                        param.grad.data *= (1. / args.center_loss_weight)
                    except AttributeError:
                        continue
                optimizer_center.step()

            batch_time.update(time.time() - t0)
            total_losses.update(total_loss.item())

            # learning_rate
            current_lr = optimizer.param_groups[0]['lr']
            summary_writer.add_scalar("lr", current_lr, global_step)

            t0 = time.time()

            if (i + 1) % args.log_period == 0:
                print(
                    f"Epoch: [{epoch}][{i+1}/{train_data_len}]  " +
                    f"Batch Time {batch_time.val:.3f} ({batch_time.mean:.3f})  "
                    +
                    f"Total_loss {total_losses.val:.3f} ({total_losses.mean:.3f})"
                )
            global_step += 1

        print(
            f"Epoch: [{epoch}]\tEpoch Time {batch_time.sum:.3f} s\tLoss {total_losses.mean:.3f}\tLr {current_lr:.2e}"
        )

        if args.eval_period > 0 and (epoch + 1) % args.eval_period == 0 or (
                epoch + 1) == args.max_epoch:
            rank, mAP = reid_evaluator.evaluate(
                val_loader,
                mode="retrieval" if args.dataset_name == "cub200" else "reid")

            rank_string = ""
            for r in (1, 2, 4, 5, 8, 10, 16, 20):
                rank_string += f"Rank-{r:<3}: {rank[r-1]:.1%}"
                if r != 20:
                    rank_string += "    "
            summary_writer.add_text("Recall@K", rank_string, global_step)
            summary_writer.add_scalar("Rank-1", rank[0], (epoch + 1))

            rank1 = rank[0]
            is_best = rank1 > best_rank1
            if is_best:
                best_rank1 = rank1
                best_mAP = mAP
                best_epoch = epoch + 1

            if (epoch + 1) % args.save_period == 0 or (epoch +
                                                       1) == args.max_epoch:
                pth_file_name = os.path.join(
                    model_save_dir,
                    f"{args.backbone}_model_{epoch + 1}.pth.tar")
                save_weights(pth_file_name, eph=epoch, steps=global_step)

            if is_best:
                pth_file_name = os.path.join(
                    model_save_dir, f"{args.backbone}_model_best.pth.tar")
                save_weights(pth_file_name, eph=epoch, steps=global_step)

        # end epoch
        current_epoch += 1

        batch_time.reset()
        total_losses.reset()
        torch.cuda.empty_cache()

        # update learning rate
        scheduler.step()

    print(f"Best rank-1 {best_rank1:.1%}, achived at epoch {best_epoch}")
    summary_writer.add_hparams(
        {
            "dataset_name": args.dataset_name,
            "triplet_dim": args.triplet_dim,
            "margin": args.margin,
            "base_lr": args.base_lr,
            "use_attn": args.use_attn,
            "use_mask": args.use_mask,
            "use_local_feat": args.use_local_feat
        }, {
            "mAP": best_mAP,
            "Rank1": best_rank1
        })
Beispiel #5
0
def test(model,
         loader_test,
         data_length,
         device,
         criterion,
         batch_size,
         print_logger,
         step,
         use_top5=False,
         verbose=False):

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    t1 = time.time()
    with torch.no_grad():
        # switch to evaluate mode
        model.eval()
        end = time.time()

        for i, data in enumerate(loader_test):
            inputs = data[0]["data"].to(device)
            targets = data[0]["label"].squeeze().long().to(device)

            # for i, (inputs, targets) in enumerate(loader_test, 1):
            #     inputs = inputs.to(device)
            #     targets = targets.to(device)

            # compute output
            output = model(inputs)
            loss = criterion(output, targets)

            #measure accuracy and record loss
            prec1, prec5 = accuracy(output, targets, topk=(1, 5))
            losses.update(loss.item(), batch_size)
            top1.update(prec1[0], batch_size)
            top5.update(prec5[0], batch_size)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()
            # plot progress

        # measure elapsed time
    t2 = time.time()

    print_logger.info('Test Step [{0}]: '
                      'Loss {loss.avg:.4f} '
                      'Prec@1(1,5) {top1.avg:.2f}, {top5.avg:.2f} '
                      'Time {time}'.format(step,
                                           loss=losses,
                                           top1=top1,
                                           top5=top5,
                                           time=t2 - t1))

    loader_test.reset()
    return top1.avg
Beispiel #6
0
def finetune(model,loader_train,data_length,device,criterion,optimizer,scheduler,\
            print_freq, print_logger,step,batch_size,epochs=1,use_top5=False,verbose=True):

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()

    top1 = AverageMeter()
    top5 = AverageMeter()
    best_acc = 0.

    # switch to train mode
    model.train()
    end = time.time()
    t1 = time.time()
    num_iterations = int(data_length / batch_size)

    for epoch in range(epochs):
        scheduler.step(epoch)

        for i, data in enumerate(loader_train):
            inputs = data[0]["data"].to(device)
            targets = data[0]["label"].squeeze().long().to(device)
            # measure data loading time
            data_time.update(time.time() - end)

            optimizer.zero_grad()
            # compute output
            output = model(inputs)
            loss = criterion(output, targets)
            # compute gradient
            loss.backward()

            nn.utils.clip_grad_norm_(model.parameters(), CLIP_VALUE)

            optimizer.step()

            optimizer.moment = []

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output.data, targets, topk=(1, 5))
            losses.update(loss.item(), batch_size)
            top1.update(prec1.item(), batch_size)
            top5.update(prec5.item(), batch_size)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % print_freq == 0:
                print_logger.info(
                    'Finetune Step [{0}] Epoch [{1}|{2}] ({3}/{4}): '
                    'Loss {loss.avg:.4f} '
                    'Prec@1(1,5) {top1.avg:.2f}, {top5.avg:.2f} '.format(
                        step,
                        epoch,
                        epochs,
                        i,
                        num_iterations,
                        loss=losses,
                        top1=top1,
                        top5=top5))

        if use_top5:
            if top5.avg > best_acc:
                best_acc = top5.avg
        else:
            if top1.avg > best_acc:
                best_acc = top1.avg
        loader_train.reset()
    return model, top1.avg
Beispiel #7
0
def finetune_one_batch(model,
                       pre_params,
                       loader_train,
                       data_length,
                       device,
                       criterion,
                       optimizer,
                       scheduler,
                       print_freq,
                       print_logger,
                       step,
                       batch_size,
                       epochs=1,
                       use_top5=False,
                       verbose=True):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()

    top1 = AverageMeter()
    top5 = AverageMeter()
    best_acc = 0.
    informance = 0.0
    params = []

    model.train()
    end = time.time()
    t1 = time.time()

    for epoch in range(epochs):
        if scheduler is not None:
            scheduler.step(epoch)

        for batch_idx, data in enumerate(loader_train, 0):
            # for i,(inputs,targets) in enumerate(loader_train,0):
            # pdb.set_trace()
            inputs, targets = data
            inputs = inputs.to(device)
            targets = targets.to(device)
            # measure data loading time
            data_time.update(time.time() - end)

            optimizer.zero_grad()
            # compute output
            output = model(inputs)

            loss = criterion(output, targets)
            # compute gradient
            loss.backward()

            nn.utils.clip_grad_norm_(model.parameters(), CLIP_VALUE)
            params = []
            optimizer.step()

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output.data, targets, topk=(1, 5))
            losses.update(loss.item(), batch_size)
            top1.update(prec1.item(), batch_size)
            top5.update(prec5.item(), batch_size)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            print_logger.info(
                'Finetune One Batch Step [{0}]: '
                'Loss {loss.avg:.4f} '
                'Prec@1(1,5) {top1.avg:.2f}, {top5.avg:.2f} '.format(
                    step, loss=losses, top1=top1, top5=top5))

        for _, p in model.named_parameters():
            params.append(p)

        moment = optimizer.moment
        informance = [0.0 for i in range(len(moment))]

        suminfo = 0.0
        for i in range(len(moment)):
            informance[i] = moment[i] * torch.pow(
                (pre_params[i] - params[i]), 2)

        suminfo = 0.0
        for info in informance:
            suminfo += torch.sum(info).item()

        if use_top5:
            if top5.avg > best_acc:
                best_acc = top5.avg
        else:
            if top1.avg > best_acc:
                best_acc = top1.avg
        optimizer.moment = []
    return model, suminfo, top1.avg
def train(train_loader,
          model,
          criterion,
          optimizer,
          epoch,
          max_epoch,
          log_freq=1,
          print_sum=True,
          poses_mean=None,
          poses_std=None,
          device=None,
          stereo=True):

    # switch model to training
    model.train()

    losses = AverageMeter()

    epoch_time = time.time()

    gt_poses = np.empty((0, 7))
    pred_poses = np.empty((0, 7))

    end = time.time()
    for idx, (batch_images, batch_poses) in enumerate(train_loader):
        data_time = (time.time() - end)

        if stereo:
            batch_images = [x.to(device) for x in batch_images]
            batch_poses = [x.to(device) for x in batch_poses]
        else:
            batch_images = batch_images.to(device)
            batch_poses = batch_poses.to(device)

        out = model(batch_images)
        loss = criterion(out, batch_poses)
        #         print('loss = {}'.format(loss))

        # Make an optimization step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        losses.update(
            loss.data[0],
            len(batch_images) *
            batch_images[0].size(0) if stereo else batch_images.size(0))

        # move data to cpu & numpy
        if stereo:
            bp = [x.detach().cpu().numpy() for x in batch_poses]
            outp = [x.detach().cpu().numpy() for x in out]
            gt_poses = np.vstack((gt_poses, *bp))
            pred_poses = np.vstack((pred_poses, *outp))
        else:
            bp = batch_poses.detach().cpu().numpy()
            outp = out.detach().cpu().numpy()
            gt_poses = np.vstack((gt_poses, bp))
            pred_poses = np.vstack((pred_poses, outp))

        batch_time = (time.time() - end)
        end = time.time()

        if log_freq != 0 and idx % log_freq == 0:
            print('Epoch: [{}/{}]\tBatch: [{}/{}]\t'
                  'Time: {batch_time:.3f}\t'
                  'Data Time: {data_time:.3f}\t'
                  'Loss: {losses.val:.3f}\t'
                  'Avg Loss: {losses.avg:.3f}\t'.format(epoch,
                                                        max_epoch - 1,
                                                        idx,
                                                        len(train_loader) - 1,
                                                        batch_time=batch_time,
                                                        data_time=data_time,
                                                        losses=losses))

        # if idx == 0:
        #     break

    # un-normalize translation
    unnorm = (poses_mean is not None) and (poses_std is not None)
    if unnorm:
        gt_poses[:, :3] = gt_poses[:, :3] * poses_std + poses_mean
        pred_poses[:, :3] = pred_poses[:, :3] * poses_std + poses_mean

    t_loss = np.asarray([
        np.linalg.norm(p - t)
        for p, t in zip(pred_poses[:, :3], gt_poses[:, :3])
    ])
    q_loss = np.asarray([
        quaternion_angular_error(p, t)
        for p, t in zip(pred_poses[:, 3:], gt_poses[:, 3:])
    ])

    #     if unnorm:
    #         print('poses_std = {:.3f}'.format(np.linalg.norm(poses_std)))
    #     print('T: median = {:.3f}, mean = {:.3f}'.format(np.median(t_loss), np.mean(t_loss)))
    #     print('R: median = {:.3f}, mean = {:.3f}'.format(np.median(q_loss), np.mean(q_loss)))

    if print_sum:
        print(
            'Ep: [{}/{}]\tTrain Loss: {:.3f}\tTe: {:.3f}\tRe: {:.3f}\t Et: {:.2f}s\t\
              {criterion_sx:.5f}:{criterion_sq:.5f}'.format(
                epoch,
                max_epoch - 1,
                losses.avg,
                np.mean(t_loss),
                np.mean(q_loss), (time.time() - epoch_time),
                criterion_sx=criterion.sx.data[0],
                criterion_sq=criterion.sq.data[0]))