Beispiel #1
0
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

from torchvision.models import densenet161
from torchvision.models import inception_v3

import numpy as np
from VideoClassification.utils.toolkits import accuracy

out = np.array([[0.1, -0.1, 0], [0.1, -0.2, 0.3], [1, -0.1, 9]])

target = np.array([1, 2, 0])

out = Variable(torch.from_numpy(out))
target = Variable(torch.from_numpy(target))

acc = accuracy(out, target, topk=(1, 2))

acc = [a.data[0] for a in acc]

import torchvision.transforms as transforms

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
Beispiel #2
0
def VGG_Temporal_Net_Run():
    epochs = 80
    loops = 2000
    learningrate = 0.2
    attenuation = 0.5

    model = VGG_Temporal_Net(pretrained=False, dropout1=0.4, dropout2=0.3).cuda()

    if Config.LOAD_SAVED_MODE_PATH is not None:
        import types
        model.try_to_load_state_dict = types.MethodType(try_to_load_state_dict, model)
        model.try_to_load_state_dict(torch.load(Config.LOAD_SAVED_MODE_PATH))
        print('LOAD {} done!'.format(Config.LOAD_SAVED_MODE_PATH))

    lossfunc = nn.CrossEntropyLoss()
    optim = torch.optim.SGD(model.parameters(), lr=learningrate, momentum=0.1)

    pq_train = PictureQueue(dsl=train_UCF0101_Temporal(), Gen=GenVariables_Temporal, batchsize=batchsize)
    pq_test = PictureQueue(dsl=test_UCF0101_Temporal(), Gen=GenVariables_Temporal, batchsize=batchsize)

    cnt = 0
    for epoch in range(epochs):

        for l in range(loops):

            cnt += 1

            imgs, labels = pq_train.Get()

            model.zero_grad()
            pred = model(imgs)
            loss = lossfunc(pred, labels)

            logger.scalar_summary('Temporal/train_loss', loss.data[0], cnt)

            loss.backward()
            optim.step()

            print('Temporal epoch: {} cnt: {} loss: {}'.format(epoch, cnt, loss.data[0]))

            if cnt % 20 == 0:
                imgs, labels = pq_test.Get()
                pred = model.inference(imgs)
                loss = lossfunc(pred, labels)

                logger.scalar_summary('Temporal/test_loss', loss.data[0], cnt)

                # acc
                acc = accuracy(pred, labels, topk=(1, 5, 10))
                logger.scalar_summary('Temporal/test_acc@1', acc[0], cnt)
                logger.scalar_summary('Temporal/test_acc@5', acc[1], cnt)
                logger.scalar_summary('Temporal/test_acc@10', acc[2], cnt)

                imgs, labels = pq_train.Get()
                pred = model.inference(imgs)

                acc = accuracy(pred, labels, topk=(1, 5, 10))
                logger.scalar_summary('Temporal/train_acc@1', acc[0], cnt)
                logger.scalar_summary('Temporal/train_acc@5', acc[1], cnt)
                logger.scalar_summary('Temporal/train_acc@10', acc[2], cnt)

            if cnt % 2000 == 0:
                savefile = savepath + 'VGG_Temporal_EX1_{:02d}.pt'.format(epoch % 50)
                print('Temporal save model to {}'.format(savefile))
                torch.save(model.state_dict(), savefile)

        if epoch in [10, 20, 50, 60]:
            learningrate = learningrate * attenuation
            optim = torch.optim.SGD(model.parameters(), lr=learningrate, momentum=0.9)
Beispiel #3
0
def VGG_TwoStream_Video_AVG_Merge_Test():
    '''
    Video Level VGG TwoStream AVG merge evalution
    '''
    loops = 100

    spa_model = VGG_Spatial_Net().cuda()
    tem_model = VGG_Temporal_Net().cuda()

    if spa_model_save_file is not None:
        spa_model.load_state_dict(torch.load(spa_model_save_file))
        print('load spa_model success!')
    if tem_model_save_file is not None:
        tem_model.load_state_dict(torch.load(tem_model_save_file))
        print('load tem_model success!')

    imgs,labels = gen()

    labels = Variable(torch.from_numpy(labels)).cuda().long()

    n,b,c,w,h = imgs.shape

    correct_d = [0,0,0]
    correct_spa = [0,0,0]
    correct_tmp = [0,0,0]

    for l in range(loops):

        imgs,labels = gen()

        print('l: ',l)
        print('correct_d',correct_d)
        print('correct_spa',correct_spa)
        print('correct_tmp',correct_tmp)

        for i in range(n):

            spatial_input = Variable(torch.from_numpy(imgs[i,:,0:3,:,:])).cuda().float()
            temporal_input = Variable(torch.from_numpy(imgs[i,:,3:,:,:])).cuda().float()

            # print(labels)

            # print('spatial_input: ',spatial_input.size())
            # print('temporal_input: ',temporal_input.size())

            predict_1 = spa_model.inference(spatial_input)
            predict_2 = tem_model.inference(temporal_input)

            # print('predict_1: ',predict_1.size())
            # print('predict_2: ',predict_2.size())

            predict_all = (predict_1+predict_2)/2

            predict_all = torch.cumsum(predict_all,0)
            predict_all = predict_all[-1,:]
            predict_all = predict_all / b
            predict_all = predict_all.view(1,101)

            target = np.array([labels[i,0]])
            target = torch.from_numpy(target).view(1,1).cuda().long()
            target = Variable(target)
            # print('target: ',target)

            # print('predict_all: ',predict_all.size())

            predict_1 = torch.cumsum(predict_1,0)
            predict_1 = predict_1[-1].view(1,101)
            predict_1 = predict_1 / b
            acc = accuracy(predict_1,target,topk=(1,5,10))

            for ii in range(3):
                if acc[ii] > 0.5:
                    correct_spa[ii] += 1

            # print(' only avg spatial net acc:')
            # print('acc@1: ',acc[0])
            # print('acc@5: ',acc[1])
            # print('acc@10: ',acc[2])
            # print('-'*20)
            #
            #
            predict_2 = torch.cumsum(predict_2,0)
            predict_2 = predict_2[-1].view(1,101)
            predict_2 = predict_2 / b
            acc = accuracy(predict_2,target,topk=(1,5,10))
            # print(' only avg temporal net acc:')
            # print('acc@1: ',acc[0])
            # print('acc@5: ',acc[1])
            # print('acc@10: ',acc[2])
            # print('-'*20)


            for ii in range(3):
                if acc[ii] > 0.5:
                    correct_tmp[ii] += 1

            acc = accuracy(predict_all,target,topk=(1,5,10))
            # print(' avg merge two net acc: ')
            # print('acc@1: ',acc[0])
            # print('acc@5: ',acc[1])
            # print('acc@10: ',acc[2])
            # print('-'*20)

            for ii in range(3):
                if acc[ii] > 0.5:
                    correct_d[ii] += 1