import torch import torch.nn as nn import torch.nn.functional as F from torch.autograd import Variable from torchvision.models import densenet161 from torchvision.models import inception_v3 import numpy as np from VideoClassification.utils.toolkits import accuracy out = np.array([[0.1, -0.1, 0], [0.1, -0.2, 0.3], [1, -0.1, 9]]) target = np.array([1, 2, 0]) out = Variable(torch.from_numpy(out)) target = Variable(torch.from_numpy(target)) acc = accuracy(out, target, topk=(1, 2)) acc = [a.data[0] for a in acc] import torchvision.transforms as transforms normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
def VGG_Temporal_Net_Run(): epochs = 80 loops = 2000 learningrate = 0.2 attenuation = 0.5 model = VGG_Temporal_Net(pretrained=False, dropout1=0.4, dropout2=0.3).cuda() if Config.LOAD_SAVED_MODE_PATH is not None: import types model.try_to_load_state_dict = types.MethodType(try_to_load_state_dict, model) model.try_to_load_state_dict(torch.load(Config.LOAD_SAVED_MODE_PATH)) print('LOAD {} done!'.format(Config.LOAD_SAVED_MODE_PATH)) lossfunc = nn.CrossEntropyLoss() optim = torch.optim.SGD(model.parameters(), lr=learningrate, momentum=0.1) pq_train = PictureQueue(dsl=train_UCF0101_Temporal(), Gen=GenVariables_Temporal, batchsize=batchsize) pq_test = PictureQueue(dsl=test_UCF0101_Temporal(), Gen=GenVariables_Temporal, batchsize=batchsize) cnt = 0 for epoch in range(epochs): for l in range(loops): cnt += 1 imgs, labels = pq_train.Get() model.zero_grad() pred = model(imgs) loss = lossfunc(pred, labels) logger.scalar_summary('Temporal/train_loss', loss.data[0], cnt) loss.backward() optim.step() print('Temporal epoch: {} cnt: {} loss: {}'.format(epoch, cnt, loss.data[0])) if cnt % 20 == 0: imgs, labels = pq_test.Get() pred = model.inference(imgs) loss = lossfunc(pred, labels) logger.scalar_summary('Temporal/test_loss', loss.data[0], cnt) # acc acc = accuracy(pred, labels, topk=(1, 5, 10)) logger.scalar_summary('Temporal/test_acc@1', acc[0], cnt) logger.scalar_summary('Temporal/test_acc@5', acc[1], cnt) logger.scalar_summary('Temporal/test_acc@10', acc[2], cnt) imgs, labels = pq_train.Get() pred = model.inference(imgs) acc = accuracy(pred, labels, topk=(1, 5, 10)) logger.scalar_summary('Temporal/train_acc@1', acc[0], cnt) logger.scalar_summary('Temporal/train_acc@5', acc[1], cnt) logger.scalar_summary('Temporal/train_acc@10', acc[2], cnt) if cnt % 2000 == 0: savefile = savepath + 'VGG_Temporal_EX1_{:02d}.pt'.format(epoch % 50) print('Temporal save model to {}'.format(savefile)) torch.save(model.state_dict(), savefile) if epoch in [10, 20, 50, 60]: learningrate = learningrate * attenuation optim = torch.optim.SGD(model.parameters(), lr=learningrate, momentum=0.9)
def VGG_TwoStream_Video_AVG_Merge_Test(): ''' Video Level VGG TwoStream AVG merge evalution ''' loops = 100 spa_model = VGG_Spatial_Net().cuda() tem_model = VGG_Temporal_Net().cuda() if spa_model_save_file is not None: spa_model.load_state_dict(torch.load(spa_model_save_file)) print('load spa_model success!') if tem_model_save_file is not None: tem_model.load_state_dict(torch.load(tem_model_save_file)) print('load tem_model success!') imgs,labels = gen() labels = Variable(torch.from_numpy(labels)).cuda().long() n,b,c,w,h = imgs.shape correct_d = [0,0,0] correct_spa = [0,0,0] correct_tmp = [0,0,0] for l in range(loops): imgs,labels = gen() print('l: ',l) print('correct_d',correct_d) print('correct_spa',correct_spa) print('correct_tmp',correct_tmp) for i in range(n): spatial_input = Variable(torch.from_numpy(imgs[i,:,0:3,:,:])).cuda().float() temporal_input = Variable(torch.from_numpy(imgs[i,:,3:,:,:])).cuda().float() # print(labels) # print('spatial_input: ',spatial_input.size()) # print('temporal_input: ',temporal_input.size()) predict_1 = spa_model.inference(spatial_input) predict_2 = tem_model.inference(temporal_input) # print('predict_1: ',predict_1.size()) # print('predict_2: ',predict_2.size()) predict_all = (predict_1+predict_2)/2 predict_all = torch.cumsum(predict_all,0) predict_all = predict_all[-1,:] predict_all = predict_all / b predict_all = predict_all.view(1,101) target = np.array([labels[i,0]]) target = torch.from_numpy(target).view(1,1).cuda().long() target = Variable(target) # print('target: ',target) # print('predict_all: ',predict_all.size()) predict_1 = torch.cumsum(predict_1,0) predict_1 = predict_1[-1].view(1,101) predict_1 = predict_1 / b acc = accuracy(predict_1,target,topk=(1,5,10)) for ii in range(3): if acc[ii] > 0.5: correct_spa[ii] += 1 # print(' only avg spatial net acc:') # print('acc@1: ',acc[0]) # print('acc@5: ',acc[1]) # print('acc@10: ',acc[2]) # print('-'*20) # # predict_2 = torch.cumsum(predict_2,0) predict_2 = predict_2[-1].view(1,101) predict_2 = predict_2 / b acc = accuracy(predict_2,target,topk=(1,5,10)) # print(' only avg temporal net acc:') # print('acc@1: ',acc[0]) # print('acc@5: ',acc[1]) # print('acc@10: ',acc[2]) # print('-'*20) for ii in range(3): if acc[ii] > 0.5: correct_tmp[ii] += 1 acc = accuracy(predict_all,target,topk=(1,5,10)) # print(' avg merge two net acc: ') # print('acc@1: ',acc[0]) # print('acc@5: ',acc[1]) # print('acc@10: ',acc[2]) # print('-'*20) for ii in range(3): if acc[ii] > 0.5: correct_d[ii] += 1