def get_dataloader(opt): mean = [110.63666788 / 255, 103.16065604 / 255, 96.29023126 / 255] std = [1, 1, 1] norm_method = Normalize(mean, std) spatial_transform = Compose( [Scale(112), CornerCrop(112, 'c'), ToTensor(255), norm_method]) temporal_transform = LoopPadding(16) target_transform = ClassLabel() test_data = SurgicalDataset(os.path.abspath(opt.frames_path), os.path.abspath( opt.video_phase_annotation_path), opt.class_names, spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, sample_duration=16) test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False, num_workers=4, pin_memory=True) return test_loader
def __init__(self, model_file, sample_duration, model_type, cuda_id=0): self.opt = parse_opts() self.opt.model = model_type self.opt.root_path = './C3D_ResNet/data' self.opt.resume_path = os.path.join(self.opt.root_path, model_file) self.opt.pretrain_path = os.path.join(self.opt.root_path, 'models/resnet-18-kinetics.pth') self.opt.cuda_id = cuda_id self.opt.dataset = 'ucf101' self.opt.n_classes = 400 self.opt.n_finetune_classes = 3 self.opt.ft_begin_index = 4 self.opt.model_depth = 18 self.opt.resnet_shortcut = 'A' self.opt.sample_duration = sample_duration self.opt.batch_size = 1 self.opt.n_threads = 1 self.opt.checkpoint = 5 self.opt.arch = '{}-{}'.format(self.opt.model, self.opt.model_depth) self.opt.mean = get_mean(self.opt.norm_value, dataset=self.opt.mean_dataset) self.opt.std = get_std(self.opt.norm_value) # print(self.opt) print('Loading C3D action-recognition model..') self.model, parameters = generate_model(self.opt) # print(self.model) if self.opt.no_mean_norm and not self.opt.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not self.opt.std_norm: norm_method = Normalize(self.opt.mean, [1, 1, 1]) else: norm_method = Normalize(self.opt.mean, self.opt.std) if self.opt.resume_path: print(' loading checkpoint {}'.format(self.opt.resume_path)) checkpoint = torch.load(self.opt.resume_path) # assert self.opt.arch == checkpoint['arch'] self.opt.begin_epoch = checkpoint['epoch'] self.model.load_state_dict(checkpoint['state_dict']) self.spatial_transform = Compose([ ScaleQC(int(self.opt.sample_size / self.opt.scale_in_test)), CornerCrop(self.opt.sample_size, self.opt.crop_position_in_test), ToTensor(self.opt.norm_value), norm_method ]) self.target_transform = ClassLabel() self.model.eval()
def model_process(count, model): opt = parse_opts() if opt.root_path != '': opt.video_path = os.path.join(opt.root_path, opt.video_path) opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path) opt.result_path = os.path.join(opt.root_path, opt.result_path) if opt.resume_path: opt.resume_path = os.path.join(opt.root_path, opt.resume_path) if opt.pretrain_path: opt.pretrain_path = os.path.join(opt.root_path, opt.pretrain_path) opt.scales = [opt.initial_scale] for i in range(1, opt.n_scales): opt.scales.append(opt.scales[-1] * opt.scale_step) #opt.arch = '{}-{}'.format(opt.model, opt.model_depth) opt.mean = get_mean(opt.norm_value, dataset=opt.mean_dataset) opt.std = get_std(opt.norm_value) #print(opt) #print(opt.result_path) with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file: json.dump(vars(opt), opt_file) torch.manual_seed(opt.manual_seed) #print(model) criterion = nn.CrossEntropyLoss() if not opt.no_cuda: criterion = criterion.cuda() if opt.no_mean_norm and not opt.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not opt.std_norm: norm_method = Normalize(opt.mean, [1, 1, 1]) else: norm_method = Normalize(opt.mean, opt.std) print('testing is run') if opt.test: spatial_transform = Compose([ Scale(int(opt.sample_size / opt.scale_in_test)), CornerCrop(opt.sample_size, opt.crop_position_in_test), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = VideoID() test_data = get_test_set(opt, spatial_transform, temporal_transform, target_transform) test_loader = torch.utils.data.DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) tester.test(count, test_loader, model, opt, test_data.class_names)
def extract_feats(file_path, net, filenames, frame_num, batch_size, save_path): """Extract 3D features (saved in .npy) for a video. """ net.eval() mean = get_mean(255, dataset='kinetics') std = get_std(255) transform = Compose([ trn.ToPILImage(), Scale(112), CornerCrop(112, 'c'), ToTensor(), Normalize(mean, std) ]) print("Network loaded") #Read videos and extract features in batches for file in filenames[start_idx:end_idx]: feat_file = os.path.join(save_path, file[:-4] + '.npy') if os.path.exists(feat_file): continue vid = imageio.get_reader(os.path.join(file_path, file), 'ffmpeg') curr_frames = [] for frame in vid: if len(frame.shape) < 3: frame = np.repeat(frame, 3) curr_frames.append(transform(frame).unsqueeze(0)) curr_frames = torch.cat(curr_frames, dim=0) print("Shape of frames: {0}".format(curr_frames.shape)) idx = np.linspace(0, len(curr_frames) - 1, frame_num).astype(int) print("Captured {} clips: {}".format(len(idx), curr_frames.shape)) curr_feats = [] for i in range(0, len(idx), batch_size): curr_batch = [ curr_frames[x - 8:x + 8, ...].unsqueeze(0) for x in idx[i:i + batch_size] ] curr_batch = torch.cat(curr_batch, dim=0).cuda() out = net(curr_batch.transpose(1, 2).cuda()) curr_feats.append(out.detach().cpu()) print("Appended {} features {}".format(i + 1, out.shape)) curr_feats = torch.cat(curr_feats, 0) del out #set_trace() np.save(feat_file, curr_feats.numpy()) print("Saved file {}\nExiting".format(file[:-4] + '.npy'))
print('run') for i in range(opt.begin_epoch, opt.n_epochs + 1): if not opt.no_train: train_epoch(i, train_loader, model, criterion, optimizer, opt, train_logger, train_batch_logger) if not opt.no_val: validation_loss = val_epoch(i, val_loader, model, criterion, opt, val_logger) if not opt.no_train and not opt.no_val: scheduler.step(validation_loss) if opt.test: spatial_transform = Compose([ Scale(int(opt.sample_size / opt.scale_in_test)), CornerCrop(opt.sample_size, opt.crop_position_in_test), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = VideoID() test_data = get_test_set(opt, spatial_transform, temporal_transform, target_transform) test_loader = torch.utils.data.DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) test.test(test_loader, model, opt, test_data.class_names)
for i in range(args.begin_epoch, args.n_epochs + 1): if not args.no_train: train_epoch(i, train_loader, model, criterion, optimizer, args, train_logger, train_batch_logger) if not args.no_val: validation_loss = val_epoch(i, val_loader, model, criterion, args, val_logger) if not args.no_train and not args.no_val: scheduler.step(validation_loss) if args.test: # Data augmentation spatial_transform = Compose([ Scale(int(args.sample_size / args.scale_in_test)), CornerCrop(args.sample_size, args.crop_position_in_test), ToTensor(args.norm_value), norm_method ]) temporal_transform = LoopPadding(args.sample_duration) target_transform = VideoID() test_data = get_test_set(args, spatial_transform, temporal_transform, target_transform) test_loader = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, shuffle=False, num_workers=args.n_threads, pin_memory=True) test.test(test_loader, model, args, test_data.class_names)
def main(args): import os import numpy as np import sys import json import torch from torch import nn from torch import optim from torch.optim import lr_scheduler from opts import parse_opts from mean import get_mean, get_std from spatial_transforms import ( Compose, Normalize, Scale, CenterCrop, CornerCrop, MultiScaleCornerCrop, MultiScaleRandomCrop, RandomHorizontalFlip, ToTensor) from temporal_transforms import LoopPadding, TemporalRandomCrop from target_transforms import ClassLabel, VideoID from target_transforms import Compose as TargetCompose from dataset import get_training_set, get_validation_set, get_test_set from utils import Logger from train import train_epoch from validation import val_epoch import test import collections from sklearn.svm import LinearSVC from sklearn.svm import SVC from joblib import dump, load from sklearn import preprocessing from scipy import stats from sklearn.metrics import accuracy_score local_path = os.getcwd() if args.video_directory_path in ["", " ", '', './video', './video/']: video_path = local_path + '/video/' else: video_path = args.video_directory_path video_path_jpg = local_path + '/video_jpg/' if not os.path.exists(video_path_jpg): os.makedirs(video_path_jpg) extracted_feature_path = local_path + '/extracted_features' if not os.path.exists(extracted_feature_path): os.makedirs(extracted_feature_path) final_results_path = local_path + '/final_test_results' if not os.path.exists(final_results_path): os.makedirs(final_results_path) os.system('python utils/video_jpg.py' + ' ' + video_path + ' ' + video_path_jpg) os.system('python utils/n_frames.py' + ' ' + video_path_jpg) if args.pretrain_directory_path in ["", " ", '', './pretrain', './pretrain/']: pretrain_directory_path = local_path + '/pretrain' else: pretrain_directory_path = args.pretrain_directory_path import easydict opt = easydict.EasyDict({ "n_classes": 2, "sample_size": 112, "sample_duration": 16, "batch_size": 16, "n_threads": 4, "norm_value": 1, "resnet_shortcut": 'B', "resnext_cardinality": 32, }) opt.root_path = local_path opt.video_path = video_path_jpg # use two gpu devices on the server, you can customize it depending on how many available gpu devices you have os.environ['CUDA_VISIBLE_DEVICES']='0' from datasets.no_label_binary import NoLabelBinary mean = get_mean(opt.norm_value, dataset='kinetics') std = get_std(opt.norm_value) norm_method = Normalize(mean, [1,1,1]) spatial_transform = Compose([ Scale(opt.sample_size), CornerCrop(opt.sample_size, 'c'), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = VideoID() # ClassLabel() # get test data test_data = NoLabelBinary( opt.video_path, None, 'testing', 0, spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, sample_duration=opt.sample_duration) # wrap test data test_loader = torch.utils.data.DataLoader( test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=False) # ### Extract Features # ##### 3D ResNeXt-101 from models import resnext # construct model architecture model_rxt101 = resnext.resnet101( num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut, cardinality=opt.resnext_cardinality, sample_size=opt.sample_size, sample_duration=opt.sample_duration) model_rxt101 = model_rxt101.cuda() # wrap the current model again in nn.DataParallel / or we can just remove the .module keys. model_rxt101 = nn.DataParallel(model_rxt101, device_ids=None) ### Load pretrained weight # customize the pretrained model path pretrain = torch.load(pretrain_directory_path + '/resnext-101-kinetics.pth') pretrain_dict = pretrain['state_dict'] # do not load the last layer since we want to fine-tune it pretrain_dict.pop('module.fc.weight') pretrain_dict.pop('module.fc.bias') model_dict = model_rxt101.state_dict() model_dict.update(pretrain_dict) model_rxt101.load_state_dict(model_dict) # register layer index to extract the features by forwarding all the video clips activation = {} def get_activation(name): def hook(model, input, output): activation[name] = output.detach() return hook model_rxt101.module.avgpool.register_forward_hook(get_activation('avgpool')) model_rxt101.eval() # forward all the videos to extract features avgpool_test = [] targets_test = [] with torch.no_grad(): print("Extract test set features:") for i, (inputs, target) in enumerate(test_loader): if i % 30 == 0: print(i) output = model_rxt101(inputs) avgpool_test.append(activation['avgpool'].view(len(target), -1).cpu()) targets_test.append(target) avgpool_test_np = np.concatenate([i.numpy() for i in avgpool_test], axis=0) np.save(opt.root_path + '/extracted_features/resnext101_avgpool_test.npy', avgpool_test_np) targets_test_np = np.concatenate(np.array(targets_test), axis=0) np.save(opt.root_path + '/extracted_features/class_names_test.npy', targets_test_np) # ##### 3D ResNet-50 from models import resnet # construct model architecture model_rt50 = resnet.resnet50( num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut, sample_size=opt.sample_size, sample_duration=opt.sample_duration) model_rt50 = model_rt50.cuda() # wrap the current model again in nn.DataParallel / or we can just remove the .module keys. model_rt50 = nn.DataParallel(model_rt50, device_ids=None) ### Load pretrained weight # customize the pretrained model path pretrain = torch.load(pretrain_directory_path + '/resnet-50-kinetics.pth') pretrain_dict = pretrain['state_dict'] # do not load the last layer since we want to fine-tune it pretrain_dict.pop('module.fc.weight') pretrain_dict.pop('module.fc.bias') model_dict = model_rt50.state_dict() model_dict.update(pretrain_dict) model_rt50.load_state_dict(model_dict) # register layer index to extract the features by forwarding all the video clips activation = {} def get_activation(name): def hook(model, input, output): activation[name] = output.detach() return hook model_rt50.module.avgpool.register_forward_hook(get_activation('avgpool')) model_rt50.eval() # forward all the videos to extract features avgpool_test = [] with torch.no_grad(): print("Extract test set features:") for i, (inputs, target) in enumerate(test_loader): if i % 30 == 0: print(i) output = model_rt50(inputs) avgpool_test.append(activation['avgpool'].view(len(target), -1).cpu()) # save the features avgpool_test_np = np.concatenate([i.numpy() for i in avgpool_test], axis=0) np.save(opt.root_path + '/extracted_features/resnet50_avgpool_test.npy', avgpool_test_np) # ### Load & fuse the features x_test_1 = np.load(opt.root_path + '/extracted_features/resnext101_avgpool_test.npy') x_test_2 = np.load(opt.root_path + '/extracted_features/resnet50_avgpool_test.npy') x_test = np.concatenate([x_test_1, x_test_2], axis=1) y_test = np.load(opt.root_path + '/extracted_features/class_names_test.npy') # ### Load Classification head and predict if args.model == 'hw4': # hw4 best model clf = load('./hw6_results/logistic2_ucf.joblib') y_pred_test_raw = clf.predict(x_test_2) y_pred_test_prob_raw = clf.predict_proba(x_test_2) elif args.model == 'hw5': # hw5 best model clf = load('./hw6_results/logistic_ucf.joblib') y_pred_test_raw = clf.predict(x_test) y_pred_test_prob_raw = clf.predict_proba(x_test) elif args.model == 'hw6': # hw6 best model clf = load('./hw6_results/logistic1_ucf.joblib') y_pred_test_raw = clf.predict(x_test_1) y_pred_test_prob_raw = clf.predict_proba(x_test_1) elif args.model == 'hw8': # hw8 best model clf = load('./hw8_results/logistic_ucf.joblib') y_pred_test_raw = clf.predict(x_test) y_pred_test_prob_raw = clf.predict_proba(x_test) elif args.model == 'final': # Final best model clf = load('./hw8_results/logistic1_ucf.joblib') y_pred_test_raw = clf.predict(x_test_1) y_pred_test_prob_raw = clf.predict_proba(x_test_1) split_idx = [] for idx, y_name in enumerate(y_test): if idx == 0 or y_name != y_test[idx-1]: split_idx.append(idx) split_idx.append(len(y_test)) y_pred_test, y_pred_test_prob, y_pred_test_final = {}, {}, {} for i, split in enumerate(split_idx): if i < len(split_idx) - 1: y_pred_test[y_test[split]] = y_pred_test_raw[split:split_idx[i+1]] y_pred_test_prob[y_test[split]] = y_pred_test_prob_raw[split:split_idx[i+1]] y_pred_test_final[y_test[split]] = np.argmax(np.mean(y_pred_test_prob_raw[split:split_idx[i+1]], axis=0)) # ### Get the length (in seconds) of each video clip tvns = list(y_pred_test_final.keys()) mp4_path = video_path clip_duration_dict = {} from moviepy.editor import VideoFileClip i = 0 for tvn in tvns: i += 1 if i % 100 == 0: print(i) clip = VideoFileClip(os.path.join(mp4_path, tvn + ".mp4")) clip_duration_dict[tvn] = [clip.duration] # ### Generate Figures import matplotlib.pyplot as plt for tvn in clip_duration_dict: interval = clip_duration_dict[tvn][0]/list(y_test).count(tvn) x = np.arange(0, clip_duration_dict[tvn][0], interval) + interval y_idx = np.argmax(y_pred_test_prob[tvn], 1) y = y_pred_test_prob[tvn][:, 1] x = x[:len(y)] plt.plot(x, y) plt.ylim([-0.1, 1.1]) plt.xlabel ('time/sec') plt.ylabel ('pred score for ground truth label') plt.title("Ground Truth Label: " + tvn + "\n Model Avg. Predict Score: " + str(np.mean(y))) # str(real_prediction_dict[tvn]['score']) plt.savefig(opt.root_path + "/final_test_results/" + tvn + '_' + args.model + "_UIN-625007598", bbox_inches='tight') plt.close() # ### Generate Json timeTrueLabel = {} for tvn in clip_duration_dict: if tvn in y_pred_test_prob: interval = clip_duration_dict[tvn][0]/list(y_test).count(tvn) x = np.arange(0, clip_duration_dict[tvn][0], interval) + interval y_idx = np.argmax(y_pred_test_prob[tvn], 1) y = y_pred_test_prob[tvn][:, 1] x = x[:len(y)] timeTrueLabel[tvn] = [[str(time), str(y[idx])] for idx, time in enumerate(x)] with open(opt.root_path + '/final_test_results/timeLabel_' + args.model + '_UIN-625007598.json', 'w') as fp: json.dump(timeTrueLabel, fp)
def get_ucf_data(opt): mean = get_mean(opt.norm_value, dataset='kinetics') std = get_std(opt.norm_value) norm_method = Normalize(mean, [1, 1, 1]) spatial_transform = Compose([ Scale(opt.sample_size), CornerCrop(opt.sample_size, 'c'), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = ClassLabel() # VideoID() # get training data training_data = UCF101(opt.video_path, opt.annotation_path, 'training', 0, spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, sample_duration=16) # wrap training data train_loader = torch.utils.data.DataLoader(training_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=False) # True # get validation data val_data = UCF101(opt.video_path, opt.annotation_path, 'validation', 0, spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, sample_duration=16) # wrap validation data val_loader = torch.utils.data.DataLoader(val_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=False) target_transform = VideoID() # get test data test_data = UCF101(opt.video_path, opt.annotation_path, 'testing', 0, spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, sample_duration=16) # wrap test data test_loader = torch.utils.data.DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=False) return train_loader, val_loader, test_loader, test_data
def create_dataloader(args): if args.root_path != '': args.video_path = os.path.join(args.root_path, args.video_path) args.annotation_path = os.path.join(args.root_path, args.annotation_path) args.result_path = os.path.join(args.root_path, args.result_path) if args.resume_path: args.resume_path = os.path.join(args.root_path, args.resume_path) if args.pretrain_path: # args.pretrain_path = os.path.join(args.root_path, args.pretrain_path) args.pretrain_path = os.path.abspath(args.pretrain_path) args.scales = [args.initial_scale] for i in range(1, args.n_scales): args.scales.append(args.scales[-1] * args.scale_step) args.mean = get_mean(args.norm_value, dataset=args.mean_dataset) args.std = get_std(args.norm_value) if args.no_mean_norm and not args.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not args.std_norm: norm_method = Normalize(args.mean, [1, 1, 1]) else: norm_method = Normalize(args.mean, args.std) assert args.train_crop in ['random', 'corner', 'center'] if args.train_crop == 'random': crop_method = MultiScaleRandomCrop(args.scales, args.sample_size) elif args.train_crop == 'corner': crop_method = MultiScaleCornerCrop(args.scales, args.sample_size) elif args.train_crop == 'center': crop_method = MultiScaleCornerCrop(args.scales, args.sample_size, crop_positions=['c']) spatial_transform = Compose([ crop_method, RandomHorizontalFlip(), ToTensor(args.norm_value), norm_method ]) temporal_transform = TemporalRandomCrop(args.sample_duration) target_transform = ClassLabel() training_data = get_training_set(args, spatial_transform, temporal_transform, target_transform) train_loader = torch.utils.data.DataLoader(training_data, batch_size=args.batch_size, shuffle=True, num_workers=args.n_threads, pin_memory=True) spatial_transform = Compose([ # Scale(args.sample_size), Scale(int(args.sample_size / args.scale_in_test)), # CenterCrop(args.sample_size), CornerCrop(args.sample_size, args.crop_position_in_test), ToTensor(args.norm_value), norm_method ]) temporal_transform = TemporalCenterCrop(args.sample_duration) target_transform = ClassLabel() validation_data = get_validation_set(args, spatial_transform, temporal_transform, target_transform) val_loader = torch.utils.data.DataLoader(validation_data, batch_size=1, shuffle=False, num_workers=args.n_threads, pin_memory=True) return train_loader, val_loader
def main(): resnet_in = generate_model(opt) resnet_in.module.fc = Identity() model = ReNet34(resnet_in, encode_length=encode_length) if opt.no_mean_norm and not opt.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not opt.std_norm: norm_method = Normalize(opt.mean, [1, 1, 1]) else: norm_method = Normalize(opt.mean, opt.std) if not opt.no_train: assert opt.train_crop in ['random', 'corner', 'center'] if opt.train_crop == 'random': crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'corner': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'center': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size, crop_positions=['c']) ## train loader spatial_transform = Compose([ crop_method, RandomHorizontalFlip(), ToTensor(opt.norm_value), norm_method ]) temporal_transform = TemporalRandomCrop(opt.sample_duration) target_transform = ClassLabel() training_data = get_training_set(opt, spatial_transform, temporal_transform, target_transform) train_loader = torch.utils.data.DataLoader(training_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.n_threads, pin_memory=True) ## test loader spatial_transform = Compose([ Scale(int(opt.sample_size / opt.scale_in_test)), CornerCrop(opt.sample_size, opt.crop_position_in_test), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = ClassLabel() test_data = get_test_set(opt, spatial_transform, temporal_transform, target_transform) test_loader = torch.utils.data.DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) ## Database loader spatial_transform = Compose([ Scale(int(opt.sample_size / opt.scale_in_test)), CornerCrop(opt.sample_size, opt.crop_position_in_test), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = ClassLabel() validation_data = get_validation_set(opt, spatial_transform, temporal_transform, target_transform) database_loader = torch.utils.data.DataLoader( validation_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) if opt.nesterov: dampening = 0 else: dampening = opt.dampening optimizer = optim.SGD(model.parameters(), lr=opt.learning_rate, momentum=opt.momentum, dampening=dampening, weight_decay=opt.weight_decay, nesterov=opt.nesterov) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=opt.lr_patience) if opt.resume_path: print('loading checkpoint {}'.format(opt.resume_path)) checkpoint = torch.load(opt.resume_path) assert opt.arch == checkpoint['arch'] opt.begin_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) if not opt.no_train: optimizer.load_state_dict(checkpoint['optimizer']) for state in optimizer.state.values(): for k, v in state.items(): if torch.is_tensor(v): state[k] = v.cuda() print('run') for epoch in range(opt.begin_epoch, opt.n_epochs + 1): model.cuda().train() for i, (images, labels) in enumerate(train_loader): images = Variable(images.cuda()) labels = Variable(labels.cuda().long()) # Forward + Backward + Optimize optimizer.zero_grad() x, _, b = model(images) target_b = F.cosine_similarity(b[:int(labels.size(0) / 2)], b[int(labels.size(0) / 2):]) target_x = F.cosine_similarity(x[:int(labels.size(0) / 2)], x[int(labels.size(0) / 2):]) loss = F.mse_loss(target_b, target_x) loss.backward() optimizer.step() scheduler.step() # Test the Model if (epoch + 1) % 10 == 0: model.eval() retrievalB, retrievalL, queryB, queryL = compress( database_loader, test_loader, model) result_map = calculate_top_map(qB=queryB, rB=retrievalB, queryL=queryL, retrievalL=retrievalL, topk=100) print('--------mAP@100: {}--------'.format(result_map))
def objective(trial): opt = parse_opts() if trial: opt.weight_decay = trial.suggest_uniform('weight_decay', 0.01, 0.1) opt.learning_rate = trial.suggest_uniform('learning_rate', 1 - 5, 1 - 4) if opt.root_path != '': opt.video_path = os.path.join(opt.root_path, opt.video_path) opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path) opt.result_path = os.path.join(opt.root_path, opt.result_path) if opt.resume_path: opt.resume_path = os.path.join(opt.root_path, opt.resume_path) if opt.pretrain_path: opt.pretrain_path = os.path.join(opt.root_path, opt.pretrain_path) opt.scales = [opt.initial_scale] for i in range(1, opt.n_scales): opt.scales.append(opt.scales[-1] * opt.scale_step) opt.arch = '{}-{}'.format(opt.model, opt.model_depth) opt.mean = get_mean(opt.norm_value, dataset=opt.mean_dataset) opt.std = get_std(opt.norm_value) print(opt) with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file: json.dump(vars(opt), opt_file) torch.manual_seed(opt.manual_seed) model, parameters = generate_model(opt) print(model) criterion = nn.CrossEntropyLoss() if not opt.no_cuda: criterion = criterion.cuda() if opt.no_mean_norm and not opt.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not opt.std_norm: norm_method = Normalize(opt.mean, [1, 1, 1]) else: norm_method = Normalize(opt.mean, opt.std) # norm_method = Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) if not opt.no_train: assert opt.train_crop in ['random', 'corner', 'center'] if opt.train_crop == 'random': crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'corner': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'center': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size, crop_positions=['c']) spatial_transform = Compose([ crop_method, RandomHorizontalFlip(), ToTensor(opt.norm_value), norm_method ]) temporal_transform = TemporalRandomCrop(opt.sample_duration) target_transform = ClassLabel() training_data = get_training_set(opt, spatial_transform, temporal_transform, target_transform) train_loader = torch.utils.data.DataLoader( training_data, batch_size=opt.batch_size, # sampler option is mutually exclusive with shuffle shuffle=False, sampler=ImbalancedDatasetSampler(training_data), num_workers=opt.n_threads, pin_memory=True) train_logger = Logger(os.path.join(opt.result_path, 'train.log'), ['epoch', 'loss', 'acc', 'lr']) train_batch_logger = Logger( os.path.join(opt.result_path, 'train_batch.log'), ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr']) optimizer = optim.Adam(parameters, lr=opt.learning_rate, weight_decay=opt.weight_decay) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, verbose=True, factor=0.1**0.5) if not opt.no_val: spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = ClassLabel() validation_data = get_validation_set(opt, spatial_transform, temporal_transform, target_transform) val_loader = torch.utils.data.DataLoader( validation_data, batch_size=opt.batch_size, shuffle=False, sampler=ImbalancedDatasetSampler(validation_data), num_workers=opt.n_threads, pin_memory=True) val_logger = Logger(os.path.join(opt.result_path, 'val.log'), ['epoch', 'loss', 'acc']) if opt.resume_path: print('loading checkpoint {}'.format(opt.resume_path)) checkpoint = torch.load(opt.resume_path) assert opt.arch == checkpoint['arch'] opt.begin_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) if not opt.no_train: optimizer.load_state_dict(checkpoint['optimizer']) print('run') writer = SummaryWriter( comment= f"_wd{opt.weight_decay}_lr{opt.learning_rate}_ft_begin{opt.ft_begin_index}_pretrain{not opt.pretrain_path == ''}" ) for i in range(opt.begin_epoch, opt.n_epochs + 1): if not opt.no_train: epoch, losses_avg, accuracies_avg = train_epoch( i, train_loader, model, criterion, optimizer, opt, train_logger, train_batch_logger) writer.add_scalar('loss/train', losses_avg, epoch) writer.add_scalar('acc/train', accuracies_avg, epoch) if not opt.no_val: epoch, val_losses_avg, val_accuracies_avg = val_epoch( i, val_loader, model, criterion, opt, val_logger) writer.add_scalar('loss/val', val_losses_avg, epoch) writer.add_scalar('acc/val', val_accuracies_avg, epoch) if not opt.no_train and not opt.no_val: scheduler.step(val_losses_avg) print('=' * 100) if opt.test: spatial_transform = Compose([ Scale(int(opt.sample_size / opt.scale_in_test)), CornerCrop(opt.sample_size, opt.crop_position_in_test), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = VideoID() test_data = get_test_set(opt, spatial_transform, temporal_transform, target_transform) test_loader = torch.utils.data.DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) test.test(test_loader, model, opt, test_data.class_names) writer.close() return val_losses_avg