예제 #1
0
def train(train_loader, model, optimizer, epoch, logger):
    losses = util.AverageMeter()
    topframe = util.AverageMeter()
    topVideoSoft = util.AverageMeter()

    # switch to train mode
    output_store_soft = []
    target_store = []
    index_vector = []

    model.train()
    
    for i, (input_var, target_var, index) in enumerate(train_loader):

        target_var = target_var.to(DEVICE)
        input_var = input_var.to(DEVICE)

        # model
        pred_score = model(input_var)
        loss = F.cross_entropy(pred_score, target_var).sum()
        
        output_store_soft.append(F.softmax(pred_score, dim=1))
        target_store.append(target_var)
        index_vector.append(index)
        
        # measure accuracy and record loss
        acc_iter = util.accuracy(pred_score.data, target_var, topk=(1,))
        losses.update(loss.item(), input_var.size(0))
        topframe.update(acc_iter[0], input_var.size(0))
        
        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if i % 200 == 0:
            logger.print('Epoch: [{:3d}][{:3d}/{:3d}]\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Acc_Iter@1 {topframe.val:.3f} ({topframe.avg:.3f})\t'
                .format(
                epoch, i, len(train_loader), loss=losses, topframe=topframe))

    index_vector = torch.cat(index_vector, dim=0)  # [256] ... [256]  --->  [21570]
    index_matrix = []
    for i in range(int(max(index_vector)) + 1):
        index_matrix.append(index_vector == i)

    index_matrix = torch.stack(index_matrix, dim=0).to(DEVICE).float()  # [21570]  --->  [380, 21570]
    output_store_soft = torch.cat(output_store_soft, dim=0)
    target_store = torch.cat(target_store, dim=0).float()  # [256] ... [256]  --->  [21570]
    output_store_soft = index_matrix.mm(output_store_soft)
    target_vector = index_matrix.mm(target_store.unsqueeze(1)).squeeze(1).div(
        index_matrix.sum(1)).long()  # [380,21570] * [21570,1] -> [380,1] / sum([21570,1]) -> [380]
    prec_video_soft = util.accuracy(output_store_soft, target_vector, topk=(1,))
    topVideoSoft.update(prec_video_soft[0].item(), i + 1)
    logger.print(' *Acc@Video_soft {topsoft.avg:.3f}   *Acc@Frame {topframe.avg:.3f} '.format(topsoft=topVideoSoft, topframe=topframe))
예제 #2
0
def val(train_loader, model, logger):
    topframe = util.AverageMeter()
    topVideoSoft = util.AverageMeter()

    # switch to train mode
    output_store_soft = []
    target_store = []
    index_vector = []

    model.eval()
    with torch.no_grad():
        for i, (input_var, target_var, index) in enumerate(train_loader):

            target_var = target_var.to(DEVICE)
            input_var = input_var.to(DEVICE)

            # model
            pred_score = model(input_var)

            output_store_soft.append(F.softmax(pred_score, dim=1))
            target_store.append(target_var)
            index_vector.append(index)

            # measure accuracy and record loss
            acc_iter = util.accuracy(pred_score.data, target_var, topk=(1, ))
            topframe.update(acc_iter[0], input_var.size(0))

        index_vector = torch.cat(index_vector,
                                 dim=0)  # [256] ... [256]  --->  [21570]
        index_matrix = []
        for i in range(int(max(index_vector)) + 1):
            index_matrix.append(index_vector == i)

        index_matrix = torch.stack(
            index_matrix,
            dim=0).to(DEVICE).float()  # [21570]  --->  [380, 21570]
        output_store_soft = torch.cat(output_store_soft, dim=0)
        target_store = torch.cat(
            target_store, dim=0).float()  # [256] ... [256]  --->  [21570]
        output_store_soft = index_matrix.mm(output_store_soft)
        target_vector = index_matrix.mm(target_store.unsqueeze(1)).squeeze(
            1).div(index_matrix.sum(1)).long(
            )  # [380,21570] * [21570,1] -> [380,1] / sum([21570,1]) -> [380]
        prec_video_soft = util.accuracy(output_store_soft,
                                        target_vector,
                                        topk=(1, ))
        topVideoSoft.update(prec_video_soft[0].item(), i + 1)
        logger.print(
            ' *Acc@Video {topVideo.avg:.3f} '.format(topVideo=topVideoSoft))

    return topVideoSoft.avg
예제 #3
0
def val(val_loader, model, at_type):
    topVideo = util.AverageMeter()

    # switch to evaluate mode
    model.eval()
    output_store_fc = []
    output_alpha    = []
    target_store = []
    index_vector = []
    with torch.no_grad():
        for i, (input_var, target, index) in enumerate(val_loader):
            # compute output
            target = target.to(DEVICE)
            input_var = input_var.to(DEVICE)
            ''' model & full_model'''
            f, alphas = model(input_var, phrase = 'eval')

            output_store_fc.append(f)
            output_alpha.append(alphas)
            target_store.append(target)
            index_vector.append(index)

        index_vector = torch.cat(index_vector, dim=0)  # [256] ... [256]  --->  [21570]
        index_matrix = []
        for i in range(int(max(index_vector)) + 1):
            index_matrix.append(index_vector == i)

        index_matrix = torch.stack(index_matrix, dim=0).to(DEVICE).float()  # [21570]  --->  [380, 21570]
        output_store_fc = torch.cat(output_store_fc, dim=0)  # [256,7] ... [256,7]  --->  [21570, 7]
        output_alpha    = torch.cat(output_alpha, dim=0)     # [256,1] ... [256,1]  --->  [21570, 1]
        target_store = torch.cat(target_store, dim=0).float()  # [256] ... [256]  --->  [21570]
        ''' keywords: mean_fc ; weight_sourcefc; sum_alpha; weightmean_sourcefc '''
        weight_sourcefc = output_store_fc.mul(output_alpha)   #[21570,512] * [21570,1] --->[21570,512]
        sum_alpha = index_matrix.mm(output_alpha) # [380,21570] * [21570,1] -> [380,1]
        weightmean_sourcefc = index_matrix.mm(weight_sourcefc).div(sum_alpha)
        target_vector = index_matrix.mm(target_store.unsqueeze(1)).squeeze(1).div(
            index_matrix.sum(1)).long()  # [380,21570] * [21570,1] -> [380,1] / sum([21570,1]) -> [380]
        if at_type == 'self-attention':
            pred_score = model(vm=weightmean_sourcefc, phrase='eval', AT_level='pred')
        if at_type == 'self_relation-attention':
            pred_score  = model(vectors=output_store_fc, vm=weightmean_sourcefc, alphas_from1=output_alpha, index_matrix=index_matrix, phrase='eval', AT_level='second_level')

        acc_video = util.accuracy(pred_score.cpu(), target_vector.cpu(), topk=(1,))
        topVideo.update(acc_video[0], i + 1)
        logger.print(' *Acc@Video {topVideo.avg:.3f} '.format(topVideo=topVideo))

        return topVideo.avg