def extract_feature(opt, video_dir, C3D_model): assert opt.mode in ['score', 'feature'] spatial_transform = Compose([Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1])]) temporal_transform = LoopPadding(opt.sample_duration) load_image_fn = None data = Video(opt, video_dir, load_image_fn, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) c3d_features = [] for i, clip in enumerate(data_loader): print(clip.mean()) ## c3d feats clip = clip.to(opt.device) with torch.no_grad(): c3d_outputs = C3D_model(clip) # 汇总 c3d_features.append(c3d_outputs.cpu().data) # torch.Size([8, 512, 14, 14]) c3d_features = torch.cat(c3d_features, 0) # c3d feature of one video return c3d_features.cpu().numpy()
def get_loaders(opt): """ Make dataloaders for train and validation sets """ # train loader norm_method = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) spatial_transform = Compose([ Scale((opt.sample_size, opt.sample_size)), Resize(256), CenterCrop(224), ToTensor(), norm_method ]) temporal_transform = TemporalRandomCrop(25) target_transform = ClassLabel() training_data = get_training_set(opt, spatial_transform, temporal_transform, target_transform) train_loader = torch.utils.data.DataLoader(training_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True) # validation loader target_transform = ClassLabel() temporal_transform = LoopPadding(25) validation_data = get_validation_set(opt, spatial_transform, temporal_transform, target_transform) val_loader = torch.utils.data.DataLoader(validation_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True) return train_loader, val_loader
def get_dataloader(opt): mean = [110.63666788 / 255, 103.16065604 / 255, 96.29023126 / 255] std = [1, 1, 1] norm_method = Normalize(mean, std) spatial_transform = Compose( [Scale(112), CornerCrop(112, 'c'), ToTensor(255), norm_method]) temporal_transform = LoopPadding(16) target_transform = ClassLabel() test_data = SurgicalDataset(os.path.abspath(opt.frames_path), os.path.abspath( opt.video_phase_annotation_path), opt.class_names, spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, sample_duration=16) test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False, num_workers=4, pin_memory=True) return test_loader
def classify_video(video_dir, video_name, class_names, model, opt): # print("video_dir: {}, video_name: {}".format(video_dir,video_name)); assert opt.mode in ['score', 'feature'] spatial_transform = Compose([Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1])]) temporal_transform = LoopPadding(opt.sample_duration) data = Video(video_dir, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) video_outputs = [] # video_segments = [] for i, (inputs, segments) in enumerate(data_loader): inputs = Variable(inputs, volatile=True) outputs = model(inputs) video_outputs.append(outputs.cpu().data) # video_segments.append(segments) if len(video_outputs) != 0: video_outputs = torch.cat(video_outputs) return video_outputs.numpy() else: return None
def classify_video(video_dir, video_name, class_names, model, opt): assert opt.mode in ['score', 'feature'] print('video_name, class_names', video_name) spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = LoopPadding(opt.sample_duration) data = Video(video_dir, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) video_outputs = [] video_segments = [] print('Running on video', video_dir) #print ('Data loader size', len(data_loader)) for i, (inputs, segments) in enumerate(data_loader): inputs = Variable(inputs, volatile=True) print(i, inputs.size(), segments.shape) outputs = model(inputs) video_outputs.append(outputs.cpu().data) video_segments.append(segments) #print('Video outputs and segments', video_outputs) results = {'video': video_name, 'clips': []} if len(video_outputs) > 0: print('Video outputs and segments: ', video_outputs[0].shape) video_outputs = torch.cat(video_outputs) video_segments = torch.cat(video_segments) _, max_indices = video_outputs.max(dim=1) print('Video outputs', video_outputs.size()) for i in range(video_outputs.size(0)): clip_results = { 'segment': video_segments[i].tolist(), } if opt.mode == 'score': clip_results['label'] = class_names[max_indices[i]] clip_results['scores'] = video_outputs[i].tolist() elif opt.mode == 'feature': clip_results['features'] = video_outputs[i].tolist() results['clips'].append(clip_results) return results
def model_process(count, model): opt = parse_opts() if opt.root_path != '': opt.video_path = os.path.join(opt.root_path, opt.video_path) opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path) opt.result_path = os.path.join(opt.root_path, opt.result_path) if opt.resume_path: opt.resume_path = os.path.join(opt.root_path, opt.resume_path) if opt.pretrain_path: opt.pretrain_path = os.path.join(opt.root_path, opt.pretrain_path) opt.scales = [opt.initial_scale] for i in range(1, opt.n_scales): opt.scales.append(opt.scales[-1] * opt.scale_step) #opt.arch = '{}-{}'.format(opt.model, opt.model_depth) opt.mean = get_mean(opt.norm_value, dataset=opt.mean_dataset) opt.std = get_std(opt.norm_value) #print(opt) #print(opt.result_path) with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file: json.dump(vars(opt), opt_file) torch.manual_seed(opt.manual_seed) #print(model) criterion = nn.CrossEntropyLoss() if not opt.no_cuda: criterion = criterion.cuda() if opt.no_mean_norm and not opt.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not opt.std_norm: norm_method = Normalize(opt.mean, [1, 1, 1]) else: norm_method = Normalize(opt.mean, opt.std) print('testing is run') if opt.test: spatial_transform = Compose([ Scale(int(opt.sample_size / opt.scale_in_test)), CornerCrop(opt.sample_size, opt.crop_position_in_test), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = VideoID() test_data = get_test_set(opt, spatial_transform, temporal_transform, target_transform) test_loader = torch.utils.data.DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) tester.test(count, test_loader, model, opt, test_data.class_names)
def classify_video(video_dir, video_name, class_names, model, opt, annotation_digit=5): assert opt.mode in ['score', 'feature'] spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = LoopPadding(opt.sample_duration) data = Video(video_dir, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) print('reading file from: ', video_dir, 'file name: ', video_name) video_outputs = [] video_segments = [] shit_lol = enumerate(data_loader) for i, (inputs, segments) in enumerate(data_loader): inputs = Variable(inputs, volatile=True) outputs = model(inputs) video_outputs.append(outputs.cpu().data) video_segments.append(segments) video_outputs = torch.cat(video_outputs) video_segments = torch.cat(video_segments) results = {'video': video_name, 'clips': []} _, max_indices = video_outputs.max(dim=1) for i in range(video_outputs.size(0)): clip_results = { 'segment': video_segments[i].tolist(), } if opt.mode == 'score': clip_results['label'] = class_names[max_indices[i]] clip_results['scores'] = video_outputs[i].tolist() elif opt.mode == 'feature': clip_results['features'] = video_outputs[i].tolist() clip_results['ground_truth_annotaion'] = annotation_digit results['clips'].append(clip_results) return results
def classify_video(video_dir, video_name, class_names, model, opt): assert opt.mode in ['score', 'feature'] spatial_transform = Compose([Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1])]) temporal_transform = LoopPadding(opt.sample_duration) data = Video(video_dir, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration, stride=opt.stride) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) video_outputs = [] video_segments = [] for i, (inputs, segments) in enumerate(data_loader): inputs = Variable(inputs, volatile=True) outputs = model(inputs) video_outputs.append(outputs.cpu().data) video_segments.append(segments) if len(video_outputs) == 0: with open("error.list", 'a') as fout: fout.write("{}\n".format(video_name)) return {} video_outputs = torch.cat(video_outputs) video_segments = torch.cat(video_segments) results = { 'video': video_name, 'clips': [] } _, max_indices = video_outputs.max(dim=1) for i in range(video_outputs.size(0)): clip_results = { 'segment': video_segments[i].tolist(), } if opt.mode == 'score': clip_results['label'] = class_names[max_indices[i]] clip_results['scores'] = video_outputs[i].tolist() elif opt.mode == 'feature': clip_results['features'] = video_outputs[i].tolist() results['clips'].append(clip_results) return results
def extract_feature(opt, video_dir, C3D_model, load_image_fn, C2D_model, c2d_shape, duration): assert opt.mode in ['score', 'feature'] C, H, W = c2d_shape spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = LoopPadding(opt.sample_duration) opt.num_segments = max(int(duration / opt.clip_len), 1) data = Video(opt, video_dir, load_image_fn, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=0, pin_memory=True) c3d_features = [] c2d_features = [] for i, (clip, frames_npy_data) in enumerate(data_loader): ## c3d feats clip = clip.to(opt.device) with torch.no_grad(): c3d_outputs = C3D_model(clip) frames = frames_npy_data.to(opt.device) with torch.no_grad(): c2d_outputs = C2D_model(frames).squeeze() if len(c2d_outputs.shape) == 1: c2d_outputs = c2d_outputs.unsqueeze(0) # 汇总 c3d_features.append(c3d_outputs.cpu().data) c2d_features.append(c2d_outputs.cpu().data) try: c3d_features = torch.cat(c3d_features) # c3d feature of one video c2d_features = torch.cat(c2d_features) # c3d feature of one video except: return None, None return c3d_features.cpu().numpy(), c2d_features.cpu().numpy()
def classify_video(video_dir, video_name, class_names, model, opt): assert opt.mode in ['score', 'feature'] spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = LoopPadding(opt.sample_duration) data = Video(video_dir, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) video_outputs = [] video_segments = [] with torch.no_grad(): for i, (inputs, segments) in enumerate(data_loader): inputs = Variable(inputs) outputs = model(inputs) video_outputs.append(outputs.cpu().data) video_segments.append(segments) video_outputs = torch.cat(video_outputs) video_segments = torch.cat(video_segments) results = {'video': video_name, 'clips': []} os.mkdir('features/' + video_name.split('.')[0]) mypath = 'features/' + video_name.split('.')[0] + '/' _, max_indices = video_outputs.max(dim=1) for i in range(video_outputs.size(0)): with open(mypath + str(i) + '.txt', 'w+') as f: f.write(' '.join(map(str, video_outputs[i].tolist()))) return results
def classify_video(video_dir, video_name, model, opt): assert opt.mode in ['score', 'feature'] spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = LoopPadding(opt.sample_duration) data = Video(video_dir, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) video_outputs = [] video_segments = [] for i, (inputs, segments) in enumerate(data_loader): inputs = Variable(inputs, volatile=True) outputs = model(inputs) video_outputs.append(outputs.cpu().data) video_segments.append(segments) video_outputs = torch.cat(video_outputs) video_segments = torch.cat(video_segments) # results = { # 'video': video_name, # 'clips': [] # } clips = [] _, max_indices = video_outputs.max(dim=1) for i in range(video_outputs.size(0)): clip_results = { 'segment': video_segments[i].tolist(), } clip_results['features'] = video_outputs[i].tolist() clips.append(clip_results) return video_name, clips
def get_loaders(opt): """ Make dataloaders for train and validation sets """ # train loader opt.mean = get_mean(opt.norm_value, dataset=opt.mean_dataset) if opt.no_mean_norm and not opt.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not opt.std_norm: norm_method = Normalize(opt.mean, [1, 1, 1]) else: norm_method = Normalize(opt.mean, opt.std) spatial_transform = Compose([ # crop_method, Scale((opt.sample_size, opt.sample_size)), # RandomHorizontalFlip(), ToTensor(opt.norm_value), norm_method ]) temporal_transform = TemporalRandomCrop(16) target_transform = ClassLabel() training_data = get_training_set(opt, spatial_transform, temporal_transform, target_transform) train_loader = torch.utils.data.DataLoader( training_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True) # validation loader spatial_transform = Compose([ Scale((opt.sample_size, opt.sample_size)), # CenterCrop(opt.sample_size), ToTensor(opt.norm_value), norm_method ]) target_transform = ClassLabel() temporal_transform = LoopPadding(16) validation_data = get_validation_set( opt, spatial_transform, temporal_transform, target_transform) val_loader = torch.utils.data.DataLoader( validation_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True) return train_loader, val_loader
def classify_video(video_dir, video_name, model, opt): assert opt.mode in ['score', 'feature'] spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = LoopPadding(opt.sample_duration) data = Video(video_dir, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=False) video_outputs = [] video_segments = [] with torch.no_grad(): for i, (inputs, segments) in enumerate(data_loader): inputs = Variable(inputs) outputs = model(inputs) video_outputs.append(outputs.cpu().data) video_segments.append(segments) if video_outputs: video_outputs = torch.cat(video_outputs) video_segments = torch.cat(video_segments) results = dict() results['video'] = video_name results['features'] = video_outputs results['clips'] = video_segments return results
def classify_video(video_dir, video_name, class_names, model, opt): assert opt.mode == 'feature' spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = LoopPadding(opt.sample_duration) data = Video(video_dir, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) video_outputs = [] video_segments = [] with torch.no_grad(): for i, (inputs, segments) in enumerate(data_loader): inputs = Variable(inputs) outputs = model(inputs) video_outputs.append(outputs.cpu().data) video_segments.append(segments) video_outputs = torch.cat(video_outputs) # video_segments = torch.cat(video_segments) results = [] for i in range(video_outputs.size(0)): clip_results = np.expand_dims(video_outputs[i].numpy(), axis=0) results.append(clip_results) results = np.concatenate(results, axis=0) return results
def classify_video(video_dir, video_name, class_names, model, opt): assert opt.mode in ['score', 'feature'] spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = LoopPadding(opt.sample_duration) data = Video(video_dir, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) video_outputs = [] video_segments = [] for i, (inputs, segments) in enumerate(data_loader): inputs = Variable(inputs, volatile=True) outputs = model(inputs) outputs = F.softmax(outputs, dim=1) video_outputs.append(outputs.cpu().data) video_segments.append(segments) video_outputs = torch.cat(video_outputs) video_segments = torch.cat(video_segments) results = {'video': video_name, 'clips': []} for i in range(video_outputs.size(0)): clip_results = { 'segment': video_segments[i].tolist(), } label = get_video_results(video_outputs[i], class_names, 5) clip_results['label'] = label results['clips'].append(clip_results) # _, max_indices = video_outputs.max(dim=1) # for i in range(video_outputs.size(0)): # clip_results = { # 'segment': video_segments[i].tolist(), # } # if opt.mode == 'score': # clip_results['label'] = class_names[max_indices[i]] # clip_results['scores'] = video_outputs[i, max_indices[i]].item() # elif opt.mode == 'feature': # clip_results['features'] = video_outputs[i].tolist() # results['clips'].append(clip_results) # average_scores = torch.mean(video_outputs, dim=0) # video_results, predicted_labels = get_video_results(average_scores, class_names, 1) # video_results = get_video_results(average_scores, class_names, 5) # results = { # 'video': video_name, # 'result': video_results, # # 'predicted_labels': predicted_labels # } return results
def objective(trial): opt = parse_opts() if trial: opt.weight_decay = trial.suggest_uniform('weight_decay', 0.01, 0.1) opt.learning_rate = trial.suggest_uniform('learning_rate', 1 - 5, 1 - 4) if opt.root_path != '': opt.video_path = os.path.join(opt.root_path, opt.video_path) opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path) opt.result_path = os.path.join(opt.root_path, opt.result_path) if opt.resume_path: opt.resume_path = os.path.join(opt.root_path, opt.resume_path) if opt.pretrain_path: opt.pretrain_path = os.path.join(opt.root_path, opt.pretrain_path) opt.scales = [opt.initial_scale] for i in range(1, opt.n_scales): opt.scales.append(opt.scales[-1] * opt.scale_step) opt.arch = '{}-{}'.format(opt.model, opt.model_depth) opt.mean = get_mean(opt.norm_value, dataset=opt.mean_dataset) opt.std = get_std(opt.norm_value) print(opt) with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file: json.dump(vars(opt), opt_file) torch.manual_seed(opt.manual_seed) model, parameters = generate_model(opt) print(model) criterion = nn.CrossEntropyLoss() if not opt.no_cuda: criterion = criterion.cuda() if opt.no_mean_norm and not opt.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not opt.std_norm: norm_method = Normalize(opt.mean, [1, 1, 1]) else: norm_method = Normalize(opt.mean, opt.std) # norm_method = Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) if not opt.no_train: assert opt.train_crop in ['random', 'corner', 'center'] if opt.train_crop == 'random': crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'corner': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'center': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size, crop_positions=['c']) spatial_transform = Compose([ crop_method, RandomHorizontalFlip(), ToTensor(opt.norm_value), norm_method ]) temporal_transform = TemporalRandomCrop(opt.sample_duration) target_transform = ClassLabel() training_data = get_training_set(opt, spatial_transform, temporal_transform, target_transform) train_loader = torch.utils.data.DataLoader( training_data, batch_size=opt.batch_size, # sampler option is mutually exclusive with shuffle shuffle=False, sampler=ImbalancedDatasetSampler(training_data), num_workers=opt.n_threads, pin_memory=True) train_logger = Logger(os.path.join(opt.result_path, 'train.log'), ['epoch', 'loss', 'acc', 'lr']) train_batch_logger = Logger( os.path.join(opt.result_path, 'train_batch.log'), ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr']) optimizer = optim.Adam(parameters, lr=opt.learning_rate, weight_decay=opt.weight_decay) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, verbose=True, factor=0.1**0.5) if not opt.no_val: spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = ClassLabel() validation_data = get_validation_set(opt, spatial_transform, temporal_transform, target_transform) val_loader = torch.utils.data.DataLoader( validation_data, batch_size=opt.batch_size, shuffle=False, sampler=ImbalancedDatasetSampler(validation_data), num_workers=opt.n_threads, pin_memory=True) val_logger = Logger(os.path.join(opt.result_path, 'val.log'), ['epoch', 'loss', 'acc']) if opt.resume_path: print('loading checkpoint {}'.format(opt.resume_path)) checkpoint = torch.load(opt.resume_path) assert opt.arch == checkpoint['arch'] opt.begin_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) if not opt.no_train: optimizer.load_state_dict(checkpoint['optimizer']) print('run') writer = SummaryWriter( comment= f"_wd{opt.weight_decay}_lr{opt.learning_rate}_ft_begin{opt.ft_begin_index}_pretrain{not opt.pretrain_path == ''}" ) for i in range(opt.begin_epoch, opt.n_epochs + 1): if not opt.no_train: epoch, losses_avg, accuracies_avg = train_epoch( i, train_loader, model, criterion, optimizer, opt, train_logger, train_batch_logger) writer.add_scalar('loss/train', losses_avg, epoch) writer.add_scalar('acc/train', accuracies_avg, epoch) if not opt.no_val: epoch, val_losses_avg, val_accuracies_avg = val_epoch( i, val_loader, model, criterion, opt, val_logger) writer.add_scalar('loss/val', val_losses_avg, epoch) writer.add_scalar('acc/val', val_accuracies_avg, epoch) if not opt.no_train and not opt.no_val: scheduler.step(val_losses_avg) print('=' * 100) if opt.test: spatial_transform = Compose([ Scale(int(opt.sample_size / opt.scale_in_test)), CornerCrop(opt.sample_size, opt.crop_position_in_test), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = VideoID() test_data = get_test_set(opt, spatial_transform, temporal_transform, target_transform) test_loader = torch.utils.data.DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) test.test(test_loader, model, opt, test_data.class_names) writer.close() return val_losses_avg
def main(args): import os import numpy as np import sys import json import torch from torch import nn from torch import optim from torch.optim import lr_scheduler from opts import parse_opts from mean import get_mean, get_std from spatial_transforms import ( Compose, Normalize, Scale, CenterCrop, CornerCrop, MultiScaleCornerCrop, MultiScaleRandomCrop, RandomHorizontalFlip, ToTensor) from temporal_transforms import LoopPadding, TemporalRandomCrop from target_transforms import ClassLabel, VideoID from target_transforms import Compose as TargetCompose from dataset import get_training_set, get_validation_set, get_test_set from utils import Logger from train import train_epoch from validation import val_epoch import test import collections from sklearn.svm import LinearSVC from sklearn.svm import SVC from joblib import dump, load from sklearn import preprocessing from scipy import stats from sklearn.metrics import accuracy_score local_path = os.getcwd() if args.video_directory_path in ["", " ", '', './video', './video/']: video_path = local_path + '/video/' else: video_path = args.video_directory_path video_path_jpg = local_path + '/video_jpg/' if not os.path.exists(video_path_jpg): os.makedirs(video_path_jpg) extracted_feature_path = local_path + '/extracted_features' if not os.path.exists(extracted_feature_path): os.makedirs(extracted_feature_path) final_results_path = local_path + '/final_test_results' if not os.path.exists(final_results_path): os.makedirs(final_results_path) os.system('python utils/video_jpg.py' + ' ' + video_path + ' ' + video_path_jpg) os.system('python utils/n_frames.py' + ' ' + video_path_jpg) if args.pretrain_directory_path in ["", " ", '', './pretrain', './pretrain/']: pretrain_directory_path = local_path + '/pretrain' else: pretrain_directory_path = args.pretrain_directory_path import easydict opt = easydict.EasyDict({ "n_classes": 2, "sample_size": 112, "sample_duration": 16, "batch_size": 16, "n_threads": 4, "norm_value": 1, "resnet_shortcut": 'B', "resnext_cardinality": 32, }) opt.root_path = local_path opt.video_path = video_path_jpg # use two gpu devices on the server, you can customize it depending on how many available gpu devices you have os.environ['CUDA_VISIBLE_DEVICES']='0' from datasets.no_label_binary import NoLabelBinary mean = get_mean(opt.norm_value, dataset='kinetics') std = get_std(opt.norm_value) norm_method = Normalize(mean, [1,1,1]) spatial_transform = Compose([ Scale(opt.sample_size), CornerCrop(opt.sample_size, 'c'), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = VideoID() # ClassLabel() # get test data test_data = NoLabelBinary( opt.video_path, None, 'testing', 0, spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, sample_duration=opt.sample_duration) # wrap test data test_loader = torch.utils.data.DataLoader( test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=False) # ### Extract Features # ##### 3D ResNeXt-101 from models import resnext # construct model architecture model_rxt101 = resnext.resnet101( num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut, cardinality=opt.resnext_cardinality, sample_size=opt.sample_size, sample_duration=opt.sample_duration) model_rxt101 = model_rxt101.cuda() # wrap the current model again in nn.DataParallel / or we can just remove the .module keys. model_rxt101 = nn.DataParallel(model_rxt101, device_ids=None) ### Load pretrained weight # customize the pretrained model path pretrain = torch.load(pretrain_directory_path + '/resnext-101-kinetics.pth') pretrain_dict = pretrain['state_dict'] # do not load the last layer since we want to fine-tune it pretrain_dict.pop('module.fc.weight') pretrain_dict.pop('module.fc.bias') model_dict = model_rxt101.state_dict() model_dict.update(pretrain_dict) model_rxt101.load_state_dict(model_dict) # register layer index to extract the features by forwarding all the video clips activation = {} def get_activation(name): def hook(model, input, output): activation[name] = output.detach() return hook model_rxt101.module.avgpool.register_forward_hook(get_activation('avgpool')) model_rxt101.eval() # forward all the videos to extract features avgpool_test = [] targets_test = [] with torch.no_grad(): print("Extract test set features:") for i, (inputs, target) in enumerate(test_loader): if i % 30 == 0: print(i) output = model_rxt101(inputs) avgpool_test.append(activation['avgpool'].view(len(target), -1).cpu()) targets_test.append(target) avgpool_test_np = np.concatenate([i.numpy() for i in avgpool_test], axis=0) np.save(opt.root_path + '/extracted_features/resnext101_avgpool_test.npy', avgpool_test_np) targets_test_np = np.concatenate(np.array(targets_test), axis=0) np.save(opt.root_path + '/extracted_features/class_names_test.npy', targets_test_np) # ##### 3D ResNet-50 from models import resnet # construct model architecture model_rt50 = resnet.resnet50( num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut, sample_size=opt.sample_size, sample_duration=opt.sample_duration) model_rt50 = model_rt50.cuda() # wrap the current model again in nn.DataParallel / or we can just remove the .module keys. model_rt50 = nn.DataParallel(model_rt50, device_ids=None) ### Load pretrained weight # customize the pretrained model path pretrain = torch.load(pretrain_directory_path + '/resnet-50-kinetics.pth') pretrain_dict = pretrain['state_dict'] # do not load the last layer since we want to fine-tune it pretrain_dict.pop('module.fc.weight') pretrain_dict.pop('module.fc.bias') model_dict = model_rt50.state_dict() model_dict.update(pretrain_dict) model_rt50.load_state_dict(model_dict) # register layer index to extract the features by forwarding all the video clips activation = {} def get_activation(name): def hook(model, input, output): activation[name] = output.detach() return hook model_rt50.module.avgpool.register_forward_hook(get_activation('avgpool')) model_rt50.eval() # forward all the videos to extract features avgpool_test = [] with torch.no_grad(): print("Extract test set features:") for i, (inputs, target) in enumerate(test_loader): if i % 30 == 0: print(i) output = model_rt50(inputs) avgpool_test.append(activation['avgpool'].view(len(target), -1).cpu()) # save the features avgpool_test_np = np.concatenate([i.numpy() for i in avgpool_test], axis=0) np.save(opt.root_path + '/extracted_features/resnet50_avgpool_test.npy', avgpool_test_np) # ### Load & fuse the features x_test_1 = np.load(opt.root_path + '/extracted_features/resnext101_avgpool_test.npy') x_test_2 = np.load(opt.root_path + '/extracted_features/resnet50_avgpool_test.npy') x_test = np.concatenate([x_test_1, x_test_2], axis=1) y_test = np.load(opt.root_path + '/extracted_features/class_names_test.npy') # ### Load Classification head and predict if args.model == 'hw4': # hw4 best model clf = load('./hw6_results/logistic2_ucf.joblib') y_pred_test_raw = clf.predict(x_test_2) y_pred_test_prob_raw = clf.predict_proba(x_test_2) elif args.model == 'hw5': # hw5 best model clf = load('./hw6_results/logistic_ucf.joblib') y_pred_test_raw = clf.predict(x_test) y_pred_test_prob_raw = clf.predict_proba(x_test) elif args.model == 'hw6': # hw6 best model clf = load('./hw6_results/logistic1_ucf.joblib') y_pred_test_raw = clf.predict(x_test_1) y_pred_test_prob_raw = clf.predict_proba(x_test_1) elif args.model == 'hw8': # hw8 best model clf = load('./hw8_results/logistic_ucf.joblib') y_pred_test_raw = clf.predict(x_test) y_pred_test_prob_raw = clf.predict_proba(x_test) elif args.model == 'final': # Final best model clf = load('./hw8_results/logistic1_ucf.joblib') y_pred_test_raw = clf.predict(x_test_1) y_pred_test_prob_raw = clf.predict_proba(x_test_1) split_idx = [] for idx, y_name in enumerate(y_test): if idx == 0 or y_name != y_test[idx-1]: split_idx.append(idx) split_idx.append(len(y_test)) y_pred_test, y_pred_test_prob, y_pred_test_final = {}, {}, {} for i, split in enumerate(split_idx): if i < len(split_idx) - 1: y_pred_test[y_test[split]] = y_pred_test_raw[split:split_idx[i+1]] y_pred_test_prob[y_test[split]] = y_pred_test_prob_raw[split:split_idx[i+1]] y_pred_test_final[y_test[split]] = np.argmax(np.mean(y_pred_test_prob_raw[split:split_idx[i+1]], axis=0)) # ### Get the length (in seconds) of each video clip tvns = list(y_pred_test_final.keys()) mp4_path = video_path clip_duration_dict = {} from moviepy.editor import VideoFileClip i = 0 for tvn in tvns: i += 1 if i % 100 == 0: print(i) clip = VideoFileClip(os.path.join(mp4_path, tvn + ".mp4")) clip_duration_dict[tvn] = [clip.duration] # ### Generate Figures import matplotlib.pyplot as plt for tvn in clip_duration_dict: interval = clip_duration_dict[tvn][0]/list(y_test).count(tvn) x = np.arange(0, clip_duration_dict[tvn][0], interval) + interval y_idx = np.argmax(y_pred_test_prob[tvn], 1) y = y_pred_test_prob[tvn][:, 1] x = x[:len(y)] plt.plot(x, y) plt.ylim([-0.1, 1.1]) plt.xlabel ('time/sec') plt.ylabel ('pred score for ground truth label') plt.title("Ground Truth Label: " + tvn + "\n Model Avg. Predict Score: " + str(np.mean(y))) # str(real_prediction_dict[tvn]['score']) plt.savefig(opt.root_path + "/final_test_results/" + tvn + '_' + args.model + "_UIN-625007598", bbox_inches='tight') plt.close() # ### Generate Json timeTrueLabel = {} for tvn in clip_duration_dict: if tvn in y_pred_test_prob: interval = clip_duration_dict[tvn][0]/list(y_test).count(tvn) x = np.arange(0, clip_duration_dict[tvn][0], interval) + interval y_idx = np.argmax(y_pred_test_prob[tvn], 1) y = y_pred_test_prob[tvn][:, 1] x = x[:len(y)] timeTrueLabel[tvn] = [[str(time), str(y[idx])] for idx, time in enumerate(x)] with open(opt.root_path + '/final_test_results/timeLabel_' + args.model + '_UIN-625007598.json', 'w') as fp: json.dump(timeTrueLabel, fp)
def main(): resnet_in = generate_model(opt) resnet_in.module.fc = Identity() model = ReNet34(resnet_in, encode_length=encode_length) if opt.no_mean_norm and not opt.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not opt.std_norm: norm_method = Normalize(opt.mean, [1, 1, 1]) else: norm_method = Normalize(opt.mean, opt.std) if not opt.no_train: assert opt.train_crop in ['random', 'corner', 'center'] if opt.train_crop == 'random': crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'corner': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'center': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size, crop_positions=['c']) ## train loader spatial_transform = Compose([ crop_method, RandomHorizontalFlip(), ToTensor(opt.norm_value), norm_method ]) temporal_transform = TemporalRandomCrop(opt.sample_duration) target_transform = ClassLabel() training_data = get_training_set(opt, spatial_transform, temporal_transform, target_transform) train_loader = torch.utils.data.DataLoader(training_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.n_threads, pin_memory=True) ## test loader spatial_transform = Compose([ Scale(int(opt.sample_size / opt.scale_in_test)), CornerCrop(opt.sample_size, opt.crop_position_in_test), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = ClassLabel() test_data = get_test_set(opt, spatial_transform, temporal_transform, target_transform) test_loader = torch.utils.data.DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) ## Database loader spatial_transform = Compose([ Scale(int(opt.sample_size / opt.scale_in_test)), CornerCrop(opt.sample_size, opt.crop_position_in_test), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = ClassLabel() validation_data = get_validation_set(opt, spatial_transform, temporal_transform, target_transform) database_loader = torch.utils.data.DataLoader( validation_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) if opt.nesterov: dampening = 0 else: dampening = opt.dampening optimizer = optim.SGD(model.parameters(), lr=opt.learning_rate, momentum=opt.momentum, dampening=dampening, weight_decay=opt.weight_decay, nesterov=opt.nesterov) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=opt.lr_patience) if opt.resume_path: print('loading checkpoint {}'.format(opt.resume_path)) checkpoint = torch.load(opt.resume_path) assert opt.arch == checkpoint['arch'] opt.begin_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) if not opt.no_train: optimizer.load_state_dict(checkpoint['optimizer']) for state in optimizer.state.values(): for k, v in state.items(): if torch.is_tensor(v): state[k] = v.cuda() print('run') for epoch in range(opt.begin_epoch, opt.n_epochs + 1): model.cuda().train() for i, (images, labels) in enumerate(train_loader): images = Variable(images.cuda()) labels = Variable(labels.cuda().long()) # Forward + Backward + Optimize optimizer.zero_grad() x, _, b = model(images) target_b = F.cosine_similarity(b[:int(labels.size(0) / 2)], b[int(labels.size(0) / 2):]) target_x = F.cosine_similarity(x[:int(labels.size(0) / 2)], x[int(labels.size(0) / 2):]) loss = F.mse_loss(target_b, target_x) loss.backward() optimizer.step() scheduler.step() # Test the Model if (epoch + 1) % 10 == 0: model.eval() retrievalB, retrievalL, queryB, queryL = compress( database_loader, test_loader, model) result_map = calculate_top_map(qB=queryB, rB=retrievalB, queryL=queryL, retrievalL=retrievalL, topk=100) print('--------mAP@100: {}--------'.format(result_map))
def extract_features(video_dir, video_name, class_names, model, opt, annotation_digit=5): assert opt.mode in ['score', 'feature'] spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = LoopPadding(opt.sample_duration) data = Video(video_dir, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) # print('reading file from: ', video_dir, 'file name: ', video_name) video_outputs = [] video_segments = [] model.eval() for i, (inputs, segments) in enumerate(data_loader): # inputs = Variable(inputs, volatile=True) inputs = inputs.cuda() outputs = model(inputs) # outputs_cpu = outputs.cpu().data.numpy() # video_outputs += outputs_cpu # video_outputs += outputs.cpu().data # np.vstack([video_outputs, outputs_cpu]) video_outputs.append(outputs.cpu().data) # video_outputs.cat(video_outputs, outputs.cpu().data) video_segments.append(segments) video_outputs = torch.cat(video_outputs) video_segments = torch.cat(video_segments) results = {'video': video_name, 'clips': []} _, max_indices = video_outputs.max(dim=1) for i in range(video_outputs.size(0)): clip_results = { 'segment': video_segments[i].tolist(), } if opt.mode == 'score': clip_results['label'] = class_names[max_indices[i]] clip_results['scores'] = video_outputs[i].tolist() elif opt.mode == 'feature': clip_results['features'] = video_outputs[i].tolist() clip_results['ground_truth_annotaion'] = annotation_digit results['clips'].append(clip_results) total_feature_vectors = len(results["clips"]) np_data = np.array([], dtype=np.float64).reshape(0, 2048) for features_in_one_video in range(total_feature_vectors): # for i in result[1]["clips"]: # print (i["scores"]) one_feature_vector = results["clips"][features_in_one_video][ "features"] a = np.asarray(one_feature_vector) # print(a) np_data = np.vstack([np_data, a]) return np_data
RandomHorizontalFlip(), ColorJitter(brightness=0.1), ToTensor(1), Normalize(args.mean, args.std) ]), 'val': Compose([ Scale(args.img_size), CenterCrop(args.img_size), ToTensor(1), Normalize(args.mean, args.std) ]) } temporal_transform = { 'train': Compose([LoopPadding(args.clip_len)]), 'val': LoopPadding(args.clip_len) } dataset = { 'train': HandHygiene(os.path.join(VIDEO_DIR, 'train'), temporal_transform=temporal_transform['train'], openpose_transform=openpose_transform['train'], spatial_transform=spatial_transform['train'], arguments=args), 'val': HandHygiene(os.path.join(VIDEO_DIR, 'val'), temporal_transform=temporal_transform['val'], openpose_transform=openpose_transform['val'], spatial_transform=spatial_transform['val'],
weight_decay=opt.weight_decay, nesterov=True) scheduler = lr_scheduler.MultiStepLR(optimizer, [15, 25, 40, 45, 50, 55, 60], gamma=0.1) if not opt.no_val: ##-------------------------------------------------------------------------------------------- if opt.model == 'I3D': spatial_transform = Compose([ Scale((256, 256)), CenterCrop(224), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(0) target_transform = ClassLabel() validation_data = get_validation_set(opt, spatial_transform, temporal_transform, target_transform) val_loader = torch.utils.data.DataLoader(validation_data, batch_size=1, shuffle=False, num_workers=opt.n_threads, pin_memory=True) elif opt.model == 'resnet_50': spatial_transform = Compose([ Scale(256), CenterCrop(256), ToTensor(opt.norm_value), norm_method ])
def main(): opt = parse_opts() ecd_name, cls_name = opt.model_name.split('-') ecd_model = get_encoder_net(ecd_name) cls_model = get_end_net(cls_name) cfg.encoder_model = ecd_name cfg.classification_model = cls_name if opt.debug: cfg.debug = opt.debug else: if opt.tensorboard == 'TEST': cfg.tensorboard = opt.model_name else: cfg.tensorboard = opt.tensorboard cfg.flag = opt.flag model = cls_model(cfg, encoder=CNNencoder( cfg, ecd_model(pretrained=True, path=opt.encoder_model))) cfg.video_path = os.path.join(cfg.root_path, cfg.video_path) cfg.annotation_path = os.path.join(cfg.root_path, cfg.annotation_path) cfg.list_all_member() torch.manual_seed(cfg.manual_seed) print('##########################################') print('####### model 仅支持单GPU') print('##########################################') model = model.cuda() print(model) criterion = nn.CrossEntropyLoss() if cfg.cuda: criterion = criterion.cuda() norm_method = Normalize([0, 0, 0], [1, 1, 1]) print('##########################################') print('####### train') print('##########################################') assert cfg.train_crop in ['random', 'corner', 'center'] if cfg.train_crop == 'random': crop_method = (cfg.scales, cfg.sample_size) elif cfg.train_crop == 'corner': crop_method = MultiScaleCornerCrop(cfg.scales, cfg.sample_size) elif cfg.train_crop == 'center': crop_method = MultiScaleCornerCrop(cfg.scales, cfg.sample_size, crop_positions=['c']) spatial_transform = Compose([ crop_method, RandomHorizontalFlip(), ToTensor(cfg.norm_value), norm_method ]) temporal_transform = TemporalRandomCrop(cfg.sample_duration) target_transform = ClassLabel() training_data = get_training_set(cfg, spatial_transform, temporal_transform, target_transform) train_loader = torch.utils.data.DataLoader(training_data, batch_size=cfg.batch_size, shuffle=True, num_workers=cfg.n_threads, drop_last=False, pin_memory=True) optimizer = model.get_optimizer(lr1=cfg.lr, lr2=cfg.lr2) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=cfg.lr_patience) print('##########################################') print('####### val') print('##########################################') spatial_transform = Compose([ Scale(cfg.sample_size), CenterCrop(cfg.sample_size), ToTensor(cfg.norm_value), norm_method ]) temporal_transform = LoopPadding(cfg.sample_duration) target_transform = ClassLabel() validation_data = get_validation_set(cfg, spatial_transform, temporal_transform, target_transform) val_loader = torch.utils.data.DataLoader(validation_data, batch_size=cfg.batch_size, shuffle=False, num_workers=cfg.n_threads, drop_last=False, pin_memory=True) print('##########################################') print('####### run') print('##########################################') if cfg.debug: logger = None else: path = get_log_dir(cfg.logdir, name=cfg.tensorboard, flag=cfg.flag) logger = Logger(logdir=path) cfg.save_config(path) for i in range(cfg.begin_epoch, cfg.n_epochs + 1): train_epoch(i, train_loader, model, criterion, optimizer, cfg, logger) validation_loss = val_epoch(i, val_loader, model, criterion, cfg, logger) scheduler.step(validation_loss)
def train_main_multi_batch(model, input_root_dir, opt): #### epoch_logger = logging.getLogger('info') batch_logger = logging.getLogger('info') elogHandler = logging.StreamHandler() eformatter = jsonlogger.JsonFormatter() elogHandler.setFormatter(eformatter) epoch_logger.addHandler(elogHandler) blogHandler = logging.StreamHandler() bformatter = jsonlogger.JsonFormatter() blogHandler.setFormatter(bformatter) batch_logger.addHandler(blogHandler) spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = LoopPadding(opt.sample_duration) # criterion = nn.CrossEntropyLoss() criterion = nn.MSELoss() if not opt.no_cuda: criterion = criterion.cuda() optimizer = optim.Adam(model.parameters(), lr=1e-3) epoch = 1 model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() accuracies = AverageMeter() end_time = time.time() ii = 0 previous_label = "FAKE" pre_previous_label = "FAKE" for files_dir in os.listdir(input_root_dir): sub_path = os.path.join(input_root_dir, files_dir) print("Files dir: " + files_dir) print("Sub path:" + sub_path) data_file_path = os.path.join(sub_path, 'metadata.json') with open(data_file_path, 'r') as data_file: labels = json.load(data_file) opt.batch_size = 36 total_batch_size = len(os.listdir(sub_path)) i = 0 input_files = os.listdir(sub_path) for inp_num in range(1, len(input_files), 2): print("Lala: " + str(inp_num)) # print(input_files) input_file1 = input_files[inp_num] input_file2 = input_files[inp_num - 1] if input_file1.endswith(".mp4") and input_file2.endswith(".mp4"): video_path1 = os.path.join(sub_path, input_file1) video_path2 = os.path.join(sub_path, input_file2) label1 = labels[input_file1] label2 = labels[input_file2] if label1['label'] != previous_label or label1[ 'label'] != pre_previous_label: previous_label = label1['label'] subprocess.call('mkdir tmp', shell=True) subprocess.call( 'ffmpeg -hide_banner -loglevel panic -i {} -vframes 288 tmp/image_%05d.jpg' .format(video_path1), shell=True) subprocess.call( 'ffmpeg -hide_banner -loglevel panic -i {} -vframes 288 -start_number 289 tmp/image_%05d.jpg' .format(video_path2), shell=True) video_dir = '{}tmp/'.format( '/data/codebases/video_classification/') data = Video(video_dir, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader( data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) for k, (inputs, targets) in enumerate(data_loader): data_time.update(time.time() - end_time) print("Label: " + label1['label'] + ", " + label2['label']) # # FOR CROSS ENTROPY LOSS # targets = torch.zeros([18, 1], dtype=torch.long) # for j in range(0,18): # if(label['label'] == 'FAKE'): # targets[j][0] = 0 # # targets[j][1] = 1 # else: # targets[j][0] = 1 # # targets[j][1] = 0 # FOR MSE LOSS targets = torch.zeros([opt.batch_size, opt.n_classes], dtype=torch.float) for j in range(0, int(opt.batch_size / 2)): if (label1['label'] == 'FAKE'): targets[j][0] = 0.0 targets[j][1] = 1.0 else: targets[j][0] = 1.0 targets[j][1] = 0.0 for j in range(int(opt.batch_size / 2), opt.batch_size): if (label2['label'] == 'FAKE'): targets[j][0] = 0.0 targets[j][1] = 1.0 else: targets[j][0] = 1.0 targets[j][1] = 0.0 if not opt.no_cuda: targets = targets.cuda(non_blocking=True) inputs = Variable(inputs) targets = Variable(targets) outputs = model(inputs) print(outputs.t()) print(targets.t()) # FOR CROSS ENTROPY LOSS # loss = criterion(outputs, torch.max(targets, 1)[1]) # FOR MSE LOSS loss = criterion(outputs, targets) print(loss) # FOR CROSS ENTROPY LOSS # acc = calculate_accuracy(outputs, targets) # FOR MSE LOSS acc = calculate_accuracy_mse(outputs, targets) print(acc) try: losses.update(loss.data[0], inputs.size(0)) except: losses.update(loss.data, inputs.size(0)) accuracies.update(acc, inputs.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() batch_time.update(time.time() - end_time) end_time = time.time() batch_logger.log( 1, { 'epoch': epoch, 'batch': i + 1, 'iter': (epoch - 1) * opt.batch_size + (i + 1), 'loss': losses.val, 'acc': accuracies.val, 'lr': optimizer.param_groups[0]['lr'] }) print( 'Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc {acc.val:.3f} ({acc.avg:.3f})'.format( epoch, i + 1, opt.batch_size, batch_time=batch_time, data_time=data_time, loss=losses, acc=accuracies)) ii += 1 subprocess.call('rm -rf tmp', shell=True) i += 1 if ii % 100 == 0: save_loc = '/data/codebases/video_classification/model{}.pth'.format( ii) torch.save(model.state_dict(), save_loc) epoch_logger.log( 1, { 'epoch': epoch, 'loss': losses.avg, 'acc': accuracies.avg, 'lr': optimizer.param_groups[0]['lr'] }) print('XXX Epoch: [{0}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc {acc.val:.3f} ({acc.avg:.3f})'.format(epoch, i + 1, opt.batch_size, batch_time=batch_time, data_time=data_time, loss=losses, acc=accuracies)) exit(1)
os.path.join(cfg.custom_logdir, 'train_batch.log'), ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr']) optimizer = model.get_optimizer(lr1=cfg.lr, lr2=cfg.lr2) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=cfg.lr_patience) print('##########################################') print('####### val') print('##########################################') spatial_transform = Compose([ Scale(cfg.sample_size), CenterCrop(cfg.sample_size), ToTensor(cfg.norm_value), norm_method ]) temporal_transform = LoopPadding(cfg.sample_duration) target_transform = ClassLabel() validation_data = get_validation_set(cfg, spatial_transform, temporal_transform, target_transform) val_loader = torch.utils.data.DataLoader(validation_data, batch_size=cfg.batch_size, shuffle=False, num_workers=cfg.n_threads, drop_last=False, pin_memory=True) val_logger = Logger(os.path.join(cfg.custom_logdir, 'val.log'), ['epoch', 'loss', 'acc']) print('##########################################') print('####### run') print('##########################################')
def get_ucf_data(opt): mean = get_mean(opt.norm_value, dataset='kinetics') std = get_std(opt.norm_value) norm_method = Normalize(mean, [1, 1, 1]) spatial_transform = Compose([ Scale(opt.sample_size), CornerCrop(opt.sample_size, 'c'), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = ClassLabel() # VideoID() # get training data training_data = UCF101(opt.video_path, opt.annotation_path, 'training', 0, spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, sample_duration=16) # wrap training data train_loader = torch.utils.data.DataLoader(training_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=False) # True # get validation data val_data = UCF101(opt.video_path, opt.annotation_path, 'validation', 0, spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, sample_duration=16) # wrap validation data val_loader = torch.utils.data.DataLoader(val_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=False) target_transform = VideoID() # get test data test_data = UCF101(opt.video_path, opt.annotation_path, 'testing', 0, spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, sample_duration=16) # wrap test data test_loader = torch.utils.data.DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=False) return train_loader, val_loader, test_loader, test_data
optimizer = optim.SGD(parameters, lr=opt.learning_rate, momentum=opt.momentum, dampening=dampening, weight_decay=opt.weight_decay, nesterov=opt.nesterov) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=opt.lr_patience) if not opt.no_val: spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = ClassLabel() validation_data = get_validation_set(opt, spatial_transform, temporal_transform, target_transform) val_loader = torch.utils.data.DataLoader(validation_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) val_logger = Logger(os.path.join(opt.result_path, 'val.log'), ['epoch', 'loss', 'acc']) if opt.resume_path: print('loading checkpoint {}') # .format(opt.resume_path)) checkpoint = torch.load(opt.resume_path)
def main(): opt = parse_opts() # Path configurations opt.annotation_path = os.path.join(opt.annotation_directory, opt.annotation_path) save_result_dir_name = \ os.path.join(opt.result_path, get_prefix() + '_{}{}_{}_epochs'.format(opt.model, opt.model_depth, opt.n_epochs)) if not os.path.exists(save_result_dir_name): os.mkdir(save_result_dir_name) opt.result_path = os.path.join(opt.result_path, save_result_dir_name) # For data generator opt.scales = [opt.initial_scale] for epoch in range(1, opt.n_scales): opt.scales.append(opt.scales[-1] * opt.scale_step) opt.arch = '{}-{}'.format(opt.model, opt.model_depth) # Model model, parameters = generate_model(opt) # print(model) # Loss function criterion = nn.CrossEntropyLoss() if not opt.no_cuda: criterion = criterion.cuda() # Normalizing if not opt.no_mean_norm: opt.mean = get_mean(opt.norm_value, dataset=opt.mean_dataset) opt.std = get_std(opt.norm_value, dataset=opt.std_dataset) norm_method = Normalize(opt.mean, opt.std) else: norm_method = Normalize([0, 0, 0], [1, 1, 1]) print(opt) with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file: json.dump(vars(opt), opt_file) # **************************** TRAINING CONFIGURATIONS ************************************ assert opt.train_crop in ['corner', 'center'] if opt.train_crop == 'corner': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'center': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size, crop_positions=['c']) # Пространственное преобразование spatial_transform = Compose([ crop_method, #RandomHorizontalFlip(), ToTensor(opt.norm_value), norm_method ]) # Временное преобразование temporal_transform = TemporalRandomCrop(opt.sample_duration) # Целевое преобразование target_transform = ClassLabel() train_loader_list = [] if not opt.no_cross_validation: annotation_list = os.listdir(opt.annotation_directory) for annotation in annotation_list: opt.annotation_path = os.path.join(opt.annotation_directory, annotation) training_data = get_training_set(opt, spatial_transform, temporal_transform, target_transform) train_loader = torch.utils.data.DataLoader( training_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.n_threads, pin_memory=True) train_loader_list.append(train_loader) else: training_data = get_training_set(opt, spatial_transform, temporal_transform, target_transform) train_loader = torch.utils.data.DataLoader(training_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.n_threads, pin_memory=True) train_loader_list.append(train_loader) train_logger = Logger(os.path.join(opt.result_path, 'train.log'), ['epoch', 'loss', 'acc', 'lr']) train_batch_logger = Logger( os.path.join(opt.result_path, 'train_batch.log'), ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr']) optimizer = optim.SGD(parameters, lr=opt.learning_rate, momentum=opt.momentum, dampening=opt.dampening, weight_decay=opt.weight_decay) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=opt.lr_patience) # ***************************** VALIDATION CONFIGURATIONS ********************************* spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = ClassLabel() val_loader_list = [] if not opt.no_cross_validation: annotation_list = os.listdir(opt.annotation_directory) for annotation in annotation_list: opt.annotation_path = os.path.join(opt.annotation_directory, annotation) validation_data = get_validation_set(opt, spatial_transform, temporal_transform, target_transform) val_loader = torch.utils.data.DataLoader(validation_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) val_loader_list.append(val_loader) else: validation_data = get_validation_set(opt, spatial_transform, temporal_transform, target_transform) val_loader = torch.utils.data.DataLoader(validation_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) val_loader_list.append(val_loader) val_logger = Logger(os.path.join(opt.result_path, 'val.log'), ['epoch', 'loss', 'acc']) # **************************************** TRAINING **************************************** epoch_avg_time = AverageMeter() train_loss_list = [] train_acc_list = [] valid_acc_list = [] best_accuracy = 0 current_train_data = 0 current_valid_data = 0 opt.frequence_cross_validation = round(opt.n_epochs / opt.n_cross_validation_sets + 0.5) for epoch in range(opt.begin_epoch, opt.n_epochs + 1): epoch_start_time = time.time() print('Epoch #' + str(epoch)) # optimizer = regulate_learning_rate(optimizer, epoch, opt.frequence_regulate_lr) train_loader = train_loader_list[current_train_data] if not opt.no_cross_validation and epoch % opt.frequence_cross_validation == 0: print('\t##### Cross-validation: switch training data #####') current_train_data = (current_train_data + 1) % len(train_loader_list) train_loader = train_loader_list[current_train_data] train_loss, train_acc = train_epoch(epoch, train_loader, model, criterion, optimizer, opt, train_logger, train_batch_logger) val_loader = val_loader_list[current_valid_data] if not opt.no_cross_validation and epoch % opt.frequence_cross_validation == 0: print('\t##### Cross-validation: switch validation data #####') current_valid_data = (current_valid_data + 1) % len(val_loader_list) val_loader = val_loader_list[current_valid_data] validation_acc = val_epoch(epoch, val_loader, model, criterion, opt, val_logger) train_loss_list.append(train_loss) train_acc_list.append(train_acc) valid_acc_list.append(validation_acc) # Save model with best accuracy if validation_acc > best_accuracy: best_accuracy = validation_acc save_file_path = os.path.join(opt.result_path, 'best_model.pth') states = { 'epoch': epoch + 1, 'arch': opt.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } torch.save(states, save_file_path) epoch_end_time = time.time() - epoch_start_time epoch_avg_time.update(epoch_end_time) print('\tTime left: ' + str(round(epoch_avg_time.avg * (opt.n_epochs - epoch) / 60, 1)) + ' minutes') # ******************************* SAVING RESULTS OF TRAINING ****************************** save_pictures(np.linspace(1, opt.n_epochs, opt.n_epochs), train_loss_list, 'red', 'Loss', os.path.join(opt.result_path, 'train_loss.png')) save_pictures(np.linspace(1, opt.n_epochs, opt.n_epochs), train_acc_list, 'blue', 'Accuracy', os.path.join(opt.result_path, 'train_accuracy.png')) save_pictures(np.linspace(1, opt.n_epochs, opt.n_epochs), valid_acc_list, 'blue', 'Accuracy', os.path.join(opt.result_path, 'validation_accuracy.png'))