def Only_Merge_Spatial_Net_Test(): spa_model = resnet152_SpatialNet().cuda().eval() if spa_model_save_file is not None: spa_model.load_state_dict(torch.load(spa_model_save_file)) print('load spa_model success!') dsl = test_UCF101_ChooseRandomFromSameVideo(dsl=UCF101_Spatial) # dsl = test_UCF0101_Spatial() # def gen(): # return GenVariables_Spatial(dsl,batchsize=16) loops = 500 correct_1 = 0 correct_5 = 0 correct_10 = 0 for l in range(loops): imgpaths, Lables = random.choice(dsl) imgs = [] for path in imgpaths: imgs.append(cv2.imread(path)) imgs = np.array(imgs) imgs = ImgAugPipes(imgs, NoAug=True, isNormal=True) imgs = Variable(torch.from_numpy(imgs)).cuda().float() lable = Variable(torch.from_numpy(np.array(Lables))).cuda().long() pred = spa_model(imgs[:3]) lable = Variable(torch.from_numpy(np.array([Lables[0]]))).cuda().long() pred = pred.sum(0) / 3 # acc = accuracy(pred.cpu(),lable.cpu(),(1,5,10)) acc = accuracy(pred.cpu(), lable.cpu(), (1, 5, 10)) correct_1 += acc[0] correct_5 += acc[1] correct_10 += acc[2] print(l, ':', acc) print('acc@1:', correct_1 / loops) print('acc@5:', correct_5 / loops) print('acc@10:', correct_10 / loops) '''
def Only_Merge_Temporal_Net(): dsl = test_UCF101_ChooseRandomFromSameVideo(dsl=UCF101_TwoStream) tem_model = resnet152_TemporalNet().cuda() if tem_model_save_file is not None: tem_model.load_state_dict(torch.load(tem_model_save_file)) print('load tem_model success!') tem_model.eval() loops = 100 correct_1 = 0 correct_5 = 0 correct_10 = 0 for l in range(loops): imgpathss, labels = random.choice(dsl) imgpathss = [imgpaths[1:] for imgpaths in imgpathss] imgs = GenTensors(imgpathss, isTemporal=True, NoAug=True) imgs = Variable(imgs).cuda() labels = Variable(torch.from_numpy(np.array(labels))).cuda().long() pred = tem_model(imgs) # acc = accuracy(pred,labels,topk=(1,5,10)) pred = pred.sum(0) / 8 lable = labels[0].cuda().long() acc = accuracy(pred, lable, topk=(1, 5, 10)) print(l, ':', acc) correct_1 += acc[0] correct_5 += acc[1] correct_10 += acc[2] print('acc@1:', correct_1 / loops) print('acc@5:', correct_5 / loops) print('acc@10:', correct_10 / loops)
def TestOnlyTemporalNet(): tem_model = resnet152_TemporalNet().cuda() if tem_model_save_file is not None: tem_model.load_state_dict(torch.load(tem_model_save_file)) print('load tem_model success!') tem_model.eval() dsl = test_UCF0101_Temporal() def gen(): return GenVariables_Temporal(dsl, batchsize=16) loops = 10 correct_1 = 0 correct_5 = 0 correct_10 = 0 for l in range(loops): images, labels = gen() pred = tem_model(images) # pred = F.softmax(pred) labels = labels.cpu() pred = pred.cpu() acc = accuracy(pred, labels, topk=(1, 5, 10)) correct_1 += acc[0] correct_5 += acc[1] correct_10 += acc[2] print(l, ':', acc) print('acc@1: ', correct_1 / loops) print('acc@5: ', correct_5 / loops) print('acc@10: ', correct_10 / loops) '''
def C3D_Net_Run(): epochs = 81 loops = 2001 learningrate = 0.0001 attenuation = 0.1 model = C3D(drop=0.9).cuda() if Config.LOAD_SAVED_MODE_PATH is not None: import types model.try_to_load_state_dict = types.MethodType( try_to_load_state_dict, model) model.try_to_load_state_dict(torch.load(Config.LOAD_SAVED_MODE_PATH)) print('LOAD {} done!'.format(Config.LOAD_SAVED_MODE_PATH)) lossfunc = nn.CrossEntropyLoss() optim = torch.optim.Adam(model.parameters(), lr=learningrate) pq_train = PictureQueue(dsl=train_UCF101_C3D(), Gen=GenVariables_C3D, batchsize=batchsize, worker=5) pq_test = PictureQueue(dsl=test_UCF101_C3D(), Gen=GenVariables_C3D, batchsize=batchsize, worker=2) cnt = 0 for epoch in range(epochs): for l in range(loops): cnt += 1 imgs, labels = pq_train.Get() model.zero_grad() pred = model(imgs) loss = lossfunc(pred, labels) logger.scalar_summary('C3D/train_loss', loss.data[0], cnt) loss.backward() optim.step() print('C3D epoch: {} cnt: {} loss: {}'.format( epoch, cnt, loss.data[0])) if cnt % 25 == 0: imgs, labels = pq_test.Get() pred = model.inference(imgs) loss = lossfunc(pred, labels) logger.scalar_summary('C3D/test_loss', loss.data[0], cnt) # acc acc = accuracy(pred, labels, topk=(1, 5, 10)) logger.scalar_summary('C3D/test_acc@1', acc[0], cnt) logger.scalar_summary('C3D/test_acc@5', acc[1], cnt) logger.scalar_summary('C3D/test_acc@10', acc[2], cnt) imgs, labels = pq_train.Get() pred = model.inference(imgs) acc = accuracy(pred, labels, topk=(1, 5, 10)) logger.scalar_summary('C3D/train_acc@1', acc[0], cnt) logger.scalar_summary('C3D/train_acc@5', acc[1], cnt) logger.scalar_summary('C3D/train_acc@10', acc[2], cnt) if cnt % 2000 == 0: savefile = savepath + 'C3D_EX1_{:02d}.pt'.format(epoch % 20) print('C3D save model to {}'.format(savefile)) torch.save(model.state_dict(), savefile) if epoch in [20, 40, 60]: learningrate = learningrate * attenuation optim = torch.optim.Adam(model.parameters(), lr=learningrate)
def Merge_Test_2(): spa_model = resnet152_SpatialNet().cuda() tem_model = resnet152_TemporalNet().cuda() if spa_model_save_file is not None: spa_model.load_state_dict(torch.load(spa_model_save_file)) print('load spa_model success!') if tem_model_save_file is not None: tem_model.load_state_dict(torch.load(tem_model_save_file)) print('load tem_model success!') spa_model.eval() tem_model.eval() dsl = test_UCF101_ChooseRandomFromSameVideo(dsl=UCF101_TwoStream) loops = len(dsl) correct_1 = 0 correct_5 = 0 correct_10 = 0 f = open('/tmp/test_log.txt', 'w') for l in range(loops): # imgpathss,labels = random.choice(dsl) imgpathss, labels = dsl[l] imgpaths_s = [imgpaths[0] for imgpaths in imgpathss] imgpaths_t = [imgpaths[1:] for imgpaths in imgpathss] imgs_t = GenTensors(imgpaths_t, isTemporal=True, NoAug=True) imgs_t = Variable(imgs_t).cuda() labels = Variable(torch.from_numpy(np.array(labels))).cuda().long() line = '{},{},'.format(l, labels[0].cpu().data.numpy()[0]) pred_t = tem_model(imgs_t) # acc = accuracy(pred,labels,topk=(1,5,10)) pred_t = pred_t.sum(0) / 8 lable = labels[0].cuda().long() acc = accuracy(pred_t, lable, topk=(1, 5, 10)) line += str(pred_t.cpu().data.numpy().tolist()[0])[1:-1] + "," print('l:', l) print('temp :', acc) imgs_s = [] for path in imgpaths_s: imgs_s.append(cv2.imread(path)) imgs_s = np.array(imgs_s) imgs_s = ImgAugPipes(imgs_s, NoAug=True, isNormal=True) imgs_s = Variable(torch.from_numpy(imgs_s)).cuda().float() pred_s = spa_model(imgs_s) pred_s = pred_s.sum(0) / 8 line += str(pred_s.cpu().data.numpy().tolist()[0])[1:-1] acc = accuracy(pred_s, lable, topk=(1, 5, 10)) print('spa:', acc) pred_a = (pred_s + pred_t) / 2 acc = accuracy(pred_a, lable, topk=(1, 5, 10)) print('all:', acc) correct_1 += acc[0] correct_5 += acc[1] correct_10 += acc[2] if l % 50 == 0: print('acc@1:', correct_1 / (l + 1)) print('acc@5:', correct_5 / (l + 1)) print('acc@10:', correct_10 / (l + 1)) f.write(line + '\n') f.flush() print('acc@1:', correct_1 / loops) print('acc@5:', correct_5 / loops) print('acc@10:', correct_10 / loops)
def VGG_Spatial_Net_Run(): epochs = 80 loops = 2000 learningrate = 0.001 attenuation = 0.1 model = VGG_Spatial_Net(pretrained=False, dropout1=0.8, dropout2=0.7).cuda() if Config.LOAD_SAVED_MODE_PATH is not None: import types model.try_to_load_state_dict = types.MethodType(try_to_load_state_dict, model) model.try_to_load_state_dict(torch.load(Config.LOAD_SAVED_MODE_PATH)) print('LOAD {} done!'.format(Config.LOAD_SAVED_MODE_PATH)) lossfunc = nn.CrossEntropyLoss() optim = torch.optim.SGD(model.parameters(), lr=learningrate, momentum=0.9) cnt = 0 pq_train = PictureQueue(dsl=train_UCF0101_Spatial(), Gen=GenVariables_Spatial, batchsize=batchsize) pq_test = PictureQueue(dsl=test_UCF0101_Spatial(), Gen=GenVariables_Spatial, batchsize=batchsize) for epoch in range(epochs): for l in range(loops): cnt += 1 imgs, labels = pq_train.Get() model.zero_grad() pred = model(imgs) loss = lossfunc(pred, labels) logger.scalar_summary('Spatial/train_loss', loss.data[0], cnt) loss.backward() optim.step() print('Spatial epoch: {} cnt: {} loss: {}'.format(epoch, cnt, loss.data[0])) if cnt % 20 == 0: imgs, labels = pq_test.Get() pred = model.inference(imgs) loss = lossfunc(pred, labels) logger.scalar_summary('Spatial/test_loss', loss.data[0], cnt) acc = accuracy(pred, labels, topk=(1, 5, 10)) logger.scalar_summary('Spatial/test_acc@1', acc[0], cnt) logger.scalar_summary('Spatial/test_acc@5', acc[1], cnt) logger.scalar_summary('Spatial/test_acc@10', acc[2], cnt) imgs, labels = pq_train.Get() pred = model.inference(imgs) acc = accuracy(pred, labels, topk=(1, 5, 10)) logger.scalar_summary('Spatial/train_acc@1', acc[0], cnt) logger.scalar_summary('Spatial/train_acc@5', acc[1], cnt) logger.scalar_summary('Spatial/train_acc@10', acc[2], cnt) if cnt % 2000 == 0: savefile = savepath + 'VGG_Spatial_EX1_{:02d}.pt'.format(epoch % 50) print('Spatial save model to {}'.format(savefile)) torch.save(model.state_dict(), savefile) if epoch in [10, 20, 50, 60]: learningrate = learningrate * attenuation optim = torch.optim.SGD(model.parameters(), lr=learningrate, momentum=0.9, )
def VGG_TwoStream_Video_AVG_Merge_Test(): ''' Video Level VGG TwoStream AVG merge evalution ''' loops = 100 spa_model = VGG_Spatial_Net().cuda() tem_model = VGG_Temporal_Net().cuda() if spa_model_save_file is not None: spa_model.load_state_dict(torch.load(spa_model_save_file)) print('load spa_model success!') if tem_model_save_file is not None: tem_model.load_state_dict(torch.load(tem_model_save_file)) print('load tem_model success!') imgs, labels = gen() labels = Variable(torch.from_numpy(labels)).cuda().long() n, b, c, w, h = imgs.shape correct_d = [0, 0, 0] correct_spa = [0, 0, 0] correct_tmp = [0, 0, 0] for l in range(loops): imgs, labels = gen() print('l: ', l) print('correct_d', correct_d) print('correct_spa', correct_spa) print('correct_tmp', correct_tmp) for i in range(n): spatial_input = Variable(torch.from_numpy(imgs[i, :, 0:3, :, :])).cuda().float() temporal_input = Variable(torch.from_numpy(imgs[i, :, 3:, :, :])).cuda().float() # print(labels) # print('spatial_input: ',spatial_input.size()) # print('temporal_input: ',temporal_input.size()) predict_1 = spa_model.inference(spatial_input) predict_2 = tem_model.inference(temporal_input) # print('predict_1: ',predict_1.size()) # print('predict_2: ',predict_2.size()) predict_all = (predict_1 + predict_2) / 2 predict_all = torch.cumsum(predict_all, 0) predict_all = predict_all[-1, :] predict_all = predict_all / b predict_all = predict_all.view(1, 101) target = np.array([labels[i, 0]]) target = torch.from_numpy(target).view(1, 1).cuda().long() target = Variable(target) # print('target: ',target) # print('predict_all: ',predict_all.size()) predict_1 = torch.cumsum(predict_1, 0) predict_1 = predict_1[-1].view(1, 101) predict_1 = predict_1 / b acc = accuracy(predict_1, target, topk=(1, 5, 10)) for ii in range(3): if acc[ii] > 0.5: correct_spa[ii] += 1 # print(' only avg spatial net acc:') # print('acc@1: ',acc[0]) # print('acc@5: ',acc[1]) # print('acc@10: ',acc[2]) # print('-'*20) # # predict_2 = torch.cumsum(predict_2, 0) predict_2 = predict_2[-1].view(1, 101) predict_2 = predict_2 / b acc = accuracy(predict_2, target, topk=(1, 5, 10)) # print(' only avg temporal net acc:') # print('acc@1: ',acc[0]) # print('acc@5: ',acc[1]) # print('acc@10: ',acc[2]) # print('-'*20) for ii in range(3): if acc[ii] > 0.5: correct_tmp[ii] += 1 acc = accuracy(predict_all, target, topk=(1, 5, 10)) # print(' avg merge two net acc: ') # print('acc@1: ',acc[0]) # print('acc@5: ',acc[1]) # print('acc@10: ',acc[2]) # print('-'*20) for ii in range(3): if acc[ii] > 0.5: correct_d[ii] += 1
import numpy as np import torch from torch.autograd import Variable from VideoClassification.utils.Others.toolkits import accuracy out = np.array([[0.1, -0.1, 0], [0.1, -0.2, 0.3], [1, -0.1, 9]]) target = np.array([1, 2, 0]) out = Variable(torch.from_numpy(out)) target = Variable(torch.from_numpy(target)) acc = accuracy(out, target, topk=(1, 2)) acc = [a.data[0] for a in acc] import torchvision.transforms as transforms normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])