def load_dataloader(args, train_paths, val_paths): train_dataset = VideoDataset(train_paths, args.cnn_feat) val_dataset = VideoDataset(val_paths, args.cnn_feat) train_dataloader = data.DataLoader(train_dataset, batch_size=1, shuffle=True, num_workers=4, pin_memory=True) val_dataloader = data.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=4, pin_memory=True) return train_dataloader, val_dataloader
def get_activations(files, data_type, model, batch_size, size, length, dims, device): """Calculates the activations of the pool_3 layer for all images. Params: -- files : List of image files paths -- model : Instance of inception model -- batch_size : Batch size of images for the model to process at once. Make sure that the number of samples is a multiple of the batch size, otherwise some samples are ignored. This behavior is retained to match the original FID score implementation. -- dims : Dimensionality of features returned by Inception -- device : Device to run calculations Returns: -- A numpy array of dimension (num images, dims) that contains the activations of the given tensor when feeding inception with the query tensor. """ model.eval() if batch_size > len(files): print(('Warning: batch size is bigger than the data size. Setting batch size to data size')) batch_size = len(files) transform = torchvision.transforms.Compose([ transforms_vid.ClipResize((size, size)), transforms_vid.ClipToTensor(), transforms_vid.ClipNormalize(mean=[114.7748, 107.7354, 99.4750], std=[1, 1, 1])] ) if data_type == 'video': ds = VideoDataset(files, length, transform) elif data_type == 'frame': ds = FrameDataset(files, length, transform) else: raise NotImplementedError dl = torch.utils.data.DataLoader(ds, batch_size=batch_size, drop_last=False, num_workers=cpu_count()) pred_arr = torch.zeros(len(files), dims).to(device) start_idx = 0 for batch in tqdm(dl): batch = batch.to(device) with torch.no_grad(): pred = model(batch) if pred.size(2) != 1 or pred.size(3) != 1 or pred.size(4) != 1: pred = adaptive_avg_pool3d(pred, output_size=(1, 1, 1)) pred = pred.squeeze(4).squeeze(3).squeeze(2) pred_arr[start_idx:start_idx + pred.shape[0]] = pred start_idx = start_idx + pred.shape[0] pred_arr = pred_arr.cpu().numpy() return pred_arr
def get_dataloaders(args): dataloaders = [] for fold in range(4): loader = {} loader['train'] = torch.utils.data.DataLoader(VideoDataset(fold, 'train', args.cls), batch_size=args.train_batch_size, num_workers=args.num_workers, shuffle=True, pin_memory=True, worker_init_fn=worker_init_fn) loader['test'] = torch.utils.data.DataLoader(VideoDataset(fold, 'test', args.cls), batch_size=args.test_batch_size, num_workers=args.num_workers, shuffle=False, pin_memory=True, worker_init_fn=worker_init_fn) dataloaders.append(loader) return dataloaders
def get_dataloaders(args): dataloaders = {} dataloaders['train'] = torch.utils.data.DataLoader( VideoDataset('train', args), batch_size=args.train_batch_size, num_workers=args.num_workers, shuffle=True, pin_memory=True, worker_init_fn=worker_init_fn) dataloaders['test'] = torch.utils.data.DataLoader( VideoDataset('test', args), batch_size=args.test_batch_size, num_workers=args.num_workers, shuffle=False, pin_memory=True, worker_init_fn=worker_init_fn) return dataloaders
def prepare_dataset(args): print('Training model on {} dataset...'.format(args.dataset)) train_data_loader = DataLoader(VideoDataset(args=args), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, drop_last=True) train_val_loaders = {'train': train_data_loader} train_val_sizes = {x: len(train_val_loaders[x].dataset) for x in ['train']} return train_val_loaders, train_val_sizes
def prepare_dataset(configs): if configs["dataset"]["name"] not in ["mug", "isogd", "surreal"]: raise NotImplementedError return VideoDataset( configs["dataset"]["name"], Path(configs["dataset"]["path"]), eval(f'preprocess_{configs["dataset"]["name"]}_dataset'), configs['video_length'], configs['image_size'], configs["dataset"]['number_limit'], )
def new_mockdataset(video_length, image_size, geometric_info="depth"): inputs = { "name": "mock", "dataset_path": "data/raw/mock", "preprocess_func": None, "video_length": video_length, "image_size": image_size, "geometric_info": geometric_info, "extension": "png", } return VideoDataset(**inputs)
def prepare_dataset(args): print('Training model on {} dataset...'.format(args.dataset)) train_data_loader = DataLoader(VideoDataset(args=args, split='train'), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, drop_last=True) val_data_loader = DataLoader(VideoDataset(args=args, split='val'), batch_size=args.batch_size, num_workers=args.num_workers, drop_last=True) test_data_loader = DataLoader(VideoDataset(args=args, split='test'), batch_size=args.batch_size, num_workers=args.num_workers, drop_last=True) train_val_loaders = {'train': train_data_loader, 'val': val_data_loader} train_val_sizes = { x: len(train_val_loaders[x].dataset) for x in ['train', 'val'] } test_size = len(test_data_loader.dataset) return train_val_loaders, train_val_sizes, test_data_loader, test_size
def main(): global args, best_prec1 args = parser.parse_args() if args.dataset == 'something-v1': num_class = 174 elif args.dataset == 'diving48': num_class = 48 elif args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'skating2': num_class = 63 else: raise ValueError('Unknown dataset ' + args.dataset) model_dir = os.path.join('experiments', args.dataset, args.arch, args.consensus_type + '-' + args.modality, str(args.run_iter)) args.train_list, args.val_list, args.root_path, args.rgb_prefix = datasets_video.return_dataset( args.dataset) if 'something' in args.dataset: # label transformation for left/right categories target_transforms = { 86: 87, 87: 86, 93: 94, 94: 93, 166: 167, 167: 166 } print('Target transformation is enabled....') else: target_transforms = None if not args.resume_rgb: if os.path.exists(model_dir): print('Dir {} exists!!! it will be removed'.format(model_dir)) shutil.rmtree(model_dir) os.makedirs(model_dir) os.makedirs(os.path.join(model_dir, args.root_log)) if args.modality == 'RGB': data_length = 1 elif args.modality in ['flow', 'RGBDiff']: data_length = 5 # data_length = 1 if args.resume_rgb: if args.modality == 'RGB': if 'gst' in args.arch: model = TemporalModel(num_class, args.num_segments, model='GST', backbone=args.arch, alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi) elif 'stm' in args.arch: model = TemporalModel(num_class, args.num_segments, model='STM', backbone=args.arch, alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi) elif 'tmp' in args.arch: model = TemporalModel(num_class, args.num_segments, model='TMP', backbone=args.arch, alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi) elif 'tsm' in args.arch: model = TemporalModel(num_class, args.num_segments, model='TSM', backbone=args.arch, alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi) elif 'ori' in args.arch: model = TemporalModel(num_class, args.num_segments, model='ORI', backbone=args.arch, alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi) elif 'I3D' in args.arch: print("!!!!!!!!!!!!!!!!!!!!!!!\n\n") model = TemporalModel(num_class, args.num_segments, model='I3D', backbone=args.arch, alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi) else: model = TemporalModel(num_class, args.num_segments, model='ORI', backbone=args.arch, alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi) if os.path.isfile(args.resume_rgb): print(("=> loading checkpoint '{}'".format(args.resume_rgb))) checkpoint = torch.load(args.resume_rgb) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] original_checkpoint = checkpoint['state_dict'] print(("(epoch {} ) best_prec1 : {} ".format( checkpoint['epoch'], best_prec1))) original_checkpoint = { k[7:]: v for k, v in original_checkpoint.items() } #model_dict = i3d_model.state_dict() #model_dict.update(pretrained_dict) model.load_state_dict(original_checkpoint) print( ("=> loaded checkpoint '{}' (epoch {} ) best_prec1 : {} ". format(args.resume_rgb, checkpoint['epoch'], best_prec1))) else: raise ValueError("=> no checkpoint found at '{}'".format( args.resume_rgb)) else: if args.modality == 'flow': if 'I3D' in args.arch: model = TemporalModel(num_class, args.num_segments, model='I3D', backbone=args.arch, alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi, modality='flow', new_length=data_length) elif args.modality == 'RGB': if 'gst' in args.arch: model = TemporalModel(num_class, args.num_segments, model='GST', backbone=args.arch, alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi) elif 'stm' in args.arch: model = TemporalModel(num_class, args.num_segments, model='STM', backbone=args.arch, alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi) elif 'tmp' in args.arch: model = TemporalModel(num_class, args.num_segments, model='TMP', backbone=args.arch, alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi) elif 'tsm' in args.arch: model = TemporalModel(num_class, args.num_segments, model='TSM', backbone=args.arch, alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi) elif 'ori' in args.arch: model = TemporalModel(num_class, args.num_segments, model='ORI', backbone=args.arch, alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi) elif 'I3D' in args.arch: model = TemporalModel(num_class, args.num_segments, model='I3D', backbone=args.arch, alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi) else: model = TemporalModel(num_class, args.num_segments, model='ORI', backbone=args.arch + '_ori', alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi) cudnn.benchmark = True writer = SummaryWriter(model_dir) # Data loading code args.store_name = '_'.join([ args.dataset, args.arch, args.consensus_type, 'segment%d' % args.num_segments ]) print('storing name: ' + args.store_name) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = get_optim_policies(model) train_augmentation = get_augmentation(mode='train') val_trans = get_augmentation(mode='val') normalize = GroupNormalize(input_mean, input_std) model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.dataset == 'diving48': args.root_path = args.root_path + '/train' train_loader = torch.utils.data.DataLoader(VideoDataset( args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=args.rgb_prefix, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ]), dataset=args.dataset), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) print("trainloader.type = {}".format(type(train_loader))) if args.dataset == 'diving48': args.root_path = args.root_path[:-6] + '/test' val_loader = torch.utils.data.DataLoader(VideoDataset( args.root_path, args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=args.rgb_prefix, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ]), dataset=args.dataset), batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: log_test = open('test_not.csv', 'w') validate(val_loader, model, criterion, log_test) os.remove(log_test) return if args.lr_scheduler == 'cos_warmup': lr_scheduler_clr = CosineAnnealingLR.WarmupCosineLR( optimizer=optimizer, milestones=[args.warmup, args.epochs], warmup_iters=args.warmup, min_ratio=1e-7) elif args.lr_scheduler == 'lr_step_warmup': lr_scheduler_clr = CosineAnnealingLR.WarmupStepLR( optimizer=optimizer, milestones=[args.warmup] + [args.epochs - 30, args.epochs - 10, args.epochs], warmup_iters=args.warmup) elif args.lr_scheduler == 'lr_step': lr_scheduler_clr = torch.optim.lr_scheduler.MultiStepLR( optimizer, args.lr_steps, 0.1) if args.resume_rgb: for epoch in range(0, args.start_epoch): optimizer.step() lr_scheduler_clr.step() log_training = open( os.path.join(model_dir, args.root_log, '%s.csv' % args.store_name), 'a') for epoch in range(args.start_epoch, args.epochs): writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch + 1) train(train_loader, model, criterion, optimizer, epoch, log_training, writer=writer) lr_scheduler_clr.step() # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, log_training, writer=writer, epoch=epoch) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'lr': optimizer.param_groups[-1]['lr'], }, is_best, model_dir) print('best_prec1: {}'.format(best_prec1)) else: save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'lr': optimizer.param_groups[-1]['lr'], }, False, model_dir)
# buffer = (buffer - np.mean(buffer)) / np.std(buffer) buffer = dataloader.loadvideo(fname) buffer = dataloader.normalize(buffer) buffer = torch.FloatTensor(buffer).permute(3, 0, 1, 2).unsqueeze(0) outputs = model(buffer) _, preds = torch.max(outputs, 1) commands = [ 'click_here', 'close_window', 'down_scroll', 'drag', 'drop_here', 'go_backward', 'go_forward', 'scroll_up', 'search_this', 'zoom_in', 'zoom_out' ] # for s in sorted(list(zip(outputs.detach().numpy()[0],commands)),reverse=True): # print(s) print(commands[preds[0].data]) if __name__ == "__main__": directory = "./zxsu/" val_set = VideoDataset(directory, mode='val') # restores the model and optimizer state_dicts lip_model = R2Plus1DClassifier(num_classes=11, layer_sizes=[3, 3, 3, 3]) state_dicts = torch.load( "/home/rkmtlab/projects/zxsu/SilentCut_Oct/pure_model.pt", map_location=torch.device("cpu")) lip_model.load_state_dict(state_dicts) lip_model.eval() for f in glob( "/home/rkmtlab/projects/zxsu/SilentCut_Oct/zxsu/click_here/*.avi"): recognize(f, lip_model, val_set)
def main(): finetuning = False global args, best_prec1 args = parser.parse_args() check_rootfolders() if args.dataset == 'something-v1': num_class = 174 args.rgb_prefix = '' rgb_read_format = "{:05d}.jpg" elif args.dataset == 'diving48': num_class = 48 args.rgb_prefix = 'frames' rgb_read_format = "{:05d}.jpg" else: raise ValueError('Unknown dataset ' + args.dataset) model_dir = os.path.join('experiments', args.dataset, args.arch, args.consensus_type + '-' + args.modality, str(args.run_iter)) if not args.resume: if os.path.exists(model_dir): print('Dir {} exists!!!'.format(model_dir)) sys.exit() else: os.makedirs(model_dir) os.makedirs(os.path.join(model_dir, args.root_log)) writer = SummaryWriter(model_dir) args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset( args.dataset) if 'something' in args.dataset: # label transformation for left/right categories target_transforms = { 86: 87, 87: 86, 93: 94, 94: 93, 166: 167, 167: 166 } print('Target transformation is enabled....') else: target_transforms = None args.store_name = '_'.join([ args.dataset, args.arch, args.consensus_type, 'segment%d' % args.num_segments ]) print('storing name: ' + args.store_name) model = VideoModel(num_class=num_class, modality=args.modality, num_segments=args.num_segments, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn, gsm=args.gsm, target_transform=target_transforms) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader(VideoDataset( args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=args.rgb_prefix + rgb_read_format, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(VideoDataset( args.root_path, args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=args.rgb_prefix + rgb_read_format, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) lr_scheduler_clr = CosineAnnealingLR.WarmupCosineLR( optimizer=optimizer, milestones=[args.warmup, args.epochs], warmup_iters=args.warmup, min_ratio=1e-7) if args.resume: for epoch in range(0, args.start_epoch): lr_scheduler_clr.step() if args.evaluate: validate(val_loader, model, criterion, 0) return log_training = open( os.path.join(model_dir, args.root_log, '%s.csv' % args.store_name), 'a') for epoch in range(args.start_epoch, args.epochs): writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch + 1) train_prec1 = train(train_loader, model, criterion, optimizer, epoch, log_training, writer=writer) lr_scheduler_clr.step() # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log_training, writer=writer, epoch=epoch) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'current_prec1': prec1, 'lr': optimizer.param_groups[-1]['lr'], }, is_best, model_dir) else: save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'current_prec1': train_prec1, 'lr': optimizer.param_groups[-1]['lr'], }, False, model_dir)
import os import torch import numpy as np from torch.utils.data import DataLoader import torch.nn as nn import torch.optim as optim from torchvision import models from dataset import VideoDataset from model.model import Model device = torch.device('cuda:1') train_data = VideoDataset( root_dir= '/home/datasets/mayilong/PycharmProjects/p55/two_stream/v1/data/datasets', split_data= '/home/datasets/mayilong/PycharmProjects/p55/two_stream/data/split_data', split='train', ) val_data = VideoDataset( root_dir= '/home/datasets/mayilong/PycharmProjects/p55/two_stream/v1/data/datasets', split_data= '/home/datasets/mayilong/PycharmProjects/p55/two_stream/data/split_data', split='val', ) train_loader = DataLoader(train_data, batch_size=32, shuffle=True, num_workers=4)
def main(): global args args = parser.parse_args() train_videofolder, val_videofolder, args.root_path, _ = return_dataset(args.dataset) num_class = 174 rgb_prefix = '' rgb_read_format = "{:05d}.jpg" model = VideoModel(num_class=num_class, modality=args.modality, num_segments=args.num_segments, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn, gsm=args.gsm, target_transform=None) model.consensus = Identity() print("parameters", sum(p.numel() for p in model.parameters())) print(model) sys.exit(1) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std train_augmentation = model.get_augmentation() policies = model.get_optim_policies() model = model.cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})" .format(args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True normalize = GroupNormalize(input_mean, input_std) dataset = VideoDataset(args.root_path, train_videofolder, num_segments=8, new_length=1, modality="RGB", image_tmpl=rgb_prefix+rgb_read_format, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor(div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize ])) def normalize_output(img): img = img - img.min() img = img / img.max() return img data = dataset[0][0].unsqueeze_(0).cuda() output = model(data) #print(model) #.exit(1) # Plot some images idx = torch.randint(0, output.size(0), ()) #pred = normalize_output(output[idx, 0]) img = data[idx, 0] #fig, axarr = plt.subplots(1, 2) plt.imshow(img.cpu().detach().numpy()) #axarr[1].imshow(pred.cpu().detach().numpy()) # Visualize feature maps activation = {} def get_activation(name): def hook(model, input, output): activation[name] = output.detach() return hook model.base_model.conv1_7x7_s2.register_forward_hook(get_activation('conv1')) data, _ = dataset[0] data.unsqueeze_(0) output = model(data.cuda()) kernels = model.base_model.conv1_7x7_s2.weight.cpu().detach() fig, axarr = plt.subplots(kernels.size(0)-40, figsize=(15,15)) for idx in range(kernels.size(0)-40): axarr[idx].imshow(np.transpose(kernels[idx].squeeze(), (1,2,0))) act = activation['conv1'].squeeze() fig, axarr = plt.subplots(act.size(0), figsize=(15,15)) for idx in range(act.size(0)): axarr[idx].imshow(np.transpose(act[idx][:3].cpu(), (1,2,0))) plt.tight_layout() plt.show()
#! /usr/bin/env python # -*- coding: utf-8 -*- # vim:fenc=utf-8 # import numpy as np import torch import torch.nn as nn import torch.optim as optim from dataset import VideoDataset from torch.utils.data import DataLoader device = torch.device('cuda:1') test_data = VideoDataset( root_dir='/home/datasets/mayilong/PycharmProjects/p55/data/rgb', split_data='/home/datasets/mayilong/PycharmProjects/p55/data/split_data', split='test', n_frame=16) test_loader = DataLoader(test_data, batch_size=8, shuffle=True) print('test samples : {}'.format(len(test_data))) model = C3D(7) def test(): model.load_state_dict(torch.load('./trained_model/c3d_new_0.7226.pth')) model.to(device) test_corrects = 0 for idx, (buf, labels) in enumerate(test_loader):
cropping = torchvision.transforms.Compose( [GroupOverSample(net.input_size, net.scale_size)]) else: raise ValueError( "Only 1 and 10 crops are supported while we got {}".format( args.test_crops)) data_loader = torch.utils.data.DataLoader(VideoDataset( directory=args.directory, num_segments=args.test_segments, root_path=args.video_root, new_length=1 if args.modality == "RGB" else 5, modality=args.modality, image_tmpl=args.video_prefix, test_mode=True, video_length=args.video_length, seq_length=args.seq_length, transform=torchvision.transforms.Compose([ cropping, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), GroupNormalize(net.input_mean, net.input_std), ])), batch_size=1, shuffle=False, num_workers=args.workers * 2, pin_memory=True) if args.gpus is not None: devices = [args.gpus[i] for i in range(args.workers)]
# Load model if args.model_type == 'mattingbase': model = MattingBase(args.model_backbone) if args.model_type == 'mattingrefine': model = MattingRefine(args.model_backbone, args.model_backbone_scale, args.model_refine_mode, args.model_refine_sample_pixels, args.model_refine_threshold, args.model_refine_kernel_size) model = model.to(device).eval() model.load_state_dict(torch.load(args.model_checkpoint), strict=False) # Load video and background vid = VideoDataset(args.video_src) bgr = [Image.open(args.video_bgr).convert('RGB')] dataset = ZipDataset([vid, bgr], transforms=A.PairCompose([ A.PairApply( T.Resize(args.video_resize[::-1]) if args. video_resize else nn.Identity()), HomographicAlignment() if args.preprocess_alignment else A.PairApply(nn.Identity()), A.PairApply(T.ToTensor()) ])) # Create output directory if os.path.exists(args.output_dir): if input(f'Directory {args.output_dir} already exists. Override? [Y/N]: ' ).lower() == 'y':
lr=args.lr, momentum=args.momentum, weight_decay=args.l2wd) #optimizer = optim.RMSprop(model.parameters(), lr = 1e-2, alpha = 0.99) # trying on dynamic scheduler scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=10, threshold=1e-3, min_lr=1e-6) # preparing the training and validation dataset train_dataloader = DataLoader(VideoDataset(args.dataset_path, args.dataset, args.split, 'train', args.modality, mean_sub=args.meansub, clip_len=args.clip_length, test_mode=args.test_mode, test_amt=args.test_amt), batch_size=args.batch_size, shuffle=True) val_dataloader = DataLoader(VideoDataset(args.dataset_path, args.dataset, args.split, 'validation', args.modality, mean_sub=args.meansub, clip_len=args.clip_length, test_mode=args.test_mode, test_amt=args.test_amt),
}, path) # print the total time needed, HH:MM:SS format time_elapsed = time.time() - start print( f"Training complete in {time_elapsed//3600}h {(time_elapsed%3600)//60}m {time_elapsed %60}s" ) # initalize the ResNet 18 version of this model model = R2Plus1DClassifier(num_classes=2, layer_sizes=[2, 2, 2, 2]).to(device) criterion = nn.CrossEntropyLoss( ) # standard crossentropy loss for classification # prepare the dataloaders into a dict train_dataloader = DataLoader(VideoDataset('/home/irhum/data/video'), batch_size=32, shuffle=True, num_workers=4) # IF training on Kinetics-600 and require exactly a million samples each epoch, # import VideoDataset1M and uncomment the following # train_dataloader = DataLoader(VideoDataset1M('/home/irhum/data/video'), batch_size=32, num_workers=4) val_dataloader = DataLoader(VideoDataset('/home/irhum/data/video', mode='val'), batch_size=32, num_workers=4) dataloaders = {'train': train_dataloader, 'val': val_dataloader} # hyperparameters as given in paper sec 4.1 optimizer = optim.SGD(model.parameters(), lr=0.01) # the scheduler divides the lr by 10 every 10 epochs scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
import torch.nn as nn import torch.optim as optim from torchvision import models from model.model import Model from dataset import VideoDataset from torch.utils.data import DataLoader import time device = torch.device('cuda:1') dataset_path = '/home/datasets/mayilong/PycharmProjects/p55/two_stream/datasets/dataset3/data' split_data = '/home/datasets/mayilong/PycharmProjects/p55/two_stream/dataset/split_data' test_data = VideoDataset(dataset_path=dataset_path, split_data=split_data, split='test', multi_scale=False, use_flip=False) test_loader = DataLoader(test_data, batch_size=4, shuffle=True, num_workers=4) model = Model(7).to(device) model.load_state_dict( torch.load('./trained_model/two_stream_0.8678.pth')['state_dict']) print('load model success') def predict(): corrects_so_far = 0 count_so_far = 0 print('Start trainning')
weight_decay=reg_weight) loss_func = CustomLoss().cuda() scheduler = CosineAnnealingWarmRestarts(reg_optimizer, T_0=10, T_mult=2, eta_min=1e-5) split = 'train' seg_dir = '/home/yangzehua/UCF_Crimes/FLOW_Segments' anno_dir = '/home/yangzehua/RoadAccidentsDetector/ucf_train_test_info/CADP_Annotations.txt' path_dir = '/home/yangzehua/RoadAccidentsDetector/ucf_train_test_info/CADP_Test.txt' # test_seg_dir = os.path.join(seg_dir, 'test') test_seg_dir = '/home/yangzehua/UCF_Crimes/CADP_FLOW_Segments/test' model_save_dir = 'Vanilla_FLOW_CADP.pt' graph_save_dir = 'Vanilla_FLOW_CADP.png' dataset = VideoDataset(data_dir=seg_dir, split=split) video_loader = DataLoader(dataset=dataset, batch_size=batch_size, num_workers=8, shuffle=True, drop_last=True) loss_list = [] auc = 0.0 auc_list = [0] for epoch in tqdm(range(epoch_num)): epoch_loss = 0 for batchX, batchY in video_loader: batchX = batchX.cuda() batchY = batchY.cuda() score_pred = net(batchX).cuda()
# Load model if args.model_type == 'mattingbase': model = MattingBase(args.model_backbone) if args.model_type == 'mattingrefine': model = MattingRefine(args.model_backbone, args.model_backbone_scale, args.model_refine_mode, args.model_refine_sample_pixels, args.model_refine_threshold, args.model_refine_kernel_size) model = model.to(device).eval() model.load_state_dict(torch.load(args.model_checkpoint, map_location=device), strict=False) # Load video and background vid = VideoDataset(args.video_src) bgr = [Image.open(args.video_bgr).convert('RGB')] dataset = ZipDataset([vid, bgr], transforms=A.PairCompose([ A.PairApply( T.Resize(args.video_resize[::-1]) if args. video_resize else nn.Identity()), HomographicAlignment() if args.preprocess_alignment else A.PairApply(nn.Identity()), A.PairApply(T.ToTensor()) ])) if args.video_target_bgr: dataset = ZipDataset([ dataset, VideoDataset(args.video_target_bgr, transforms=T.ToTensor()) ])
def main(): global args, best_prec1 args = parser.parse_args() if not os.path.exists('./record'): os.mkdir('./record') if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'sthsth': num_class = 174 else: raise ValueError('Unknown dataset ' + args.dataset) model = SlowFastNet(num_class) train_augmentation = get_augmentation('RGB', input_size) model = torch.nn.DataParallel(model).cuda() args.start_epoch=0 if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})" .format(args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True normalize = torchvision.transforms.Compose([GroupNormalize(input_mean, input_std),f2Dt3D()]) train_loader = torch.utils.data.DataLoader( VideoDataset(args.root_path, args.train_list, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=False), ToTorchFormatTensor(div=True), normalize, ]), mode='train', T=args.T, tau=args.tau, dense_sample=not args.no_dense_sample), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( VideoDataset(args.root_path, args.val_list, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(input_size), Stack(roll=False), ToTorchFormatTensor(div=True), normalize, ]), mode='test', T=args.T, tau=args.tau, dense_sample=not args.no_dense_sample), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer criterion = torch.nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) schduler = WarmUpMultiStepLR(optimizer, [20, 30, 40], 0.1, last_epoch=args.start_epoch-1) # the way in the raw paper ,But I do not use it, because I can't estimate how many iter to train # max_step = len(train_loader)*args.epochs # lr_lambda = lambda step: 0.5 * args.lr* ((np.cos(step / max_step * np.pi)) + 1) # scheduler = torch.nn.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=[lr_lambda]) if args.evaluate: validate(val_loader, model, criterion, 0) return for epoch in range(args.start_epoch, args.epochs): schduler.step() print('Epoch {}/{}'.format(epoch + 1, args.epochs)) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set prec1 = validate(val_loader, model, criterion, epoch) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best, epoch + 1)
def train_model(num_classes, directory, path="model_data.pth.tar"): # batch_size = 20 commands = sorted([ 'caption', 'play', 'stop', 'go_back', 'go_forward', 'previous', 'next', 'volume_up', 'volume_down', 'maximize', 'expand', 'delete', 'save', 'like', 'dislike', 'share', 'add_to_queue', 'watch_later', 'home', 'trending', 'subscription', 'original', 'library', 'profile', 'notification', 'scroll_up', 'scroll_down', 'click']) device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu") folder = Path(directory) train_fnames,train_labels,val_fnames,val_labels = [],[],[],[] for label in sorted(os.listdir(folder)): shuffled_list = os.listdir(os.path.join(folder, label)) random.Random(4).shuffle(shuffled_list) for fname in shuffled_list[:-10]: train_fnames.append(os.path.join(folder, label, fname)) train_labels.append(label) for fname in shuffled_list[-10:]: val_fnames.append(os.path.join(folder, label, fname)) val_labels.append(label) layer_sizes=[2,2,2,2,2,2] save=True # initalize the ResNet 18 version of this model model = R2Plus1DClassifier(num_classes=num_classes, layer_sizes=layer_sizes).to(device) transforms = video_transforms.Compose([video_transforms.CenterCrop((30,60))]) train_set = VideoDataset(fnames=train_fnames,labels=train_labels,transforms=transforms) train_set = VideoDataset(fnames=val_fnames,labels=val_labels,transforms=transforms) train_dataloader = DataLoader(train_set, batch_size = 1, shuffle=False, num_workers= 4) val_dataloader = DataLoader(train_set, batch_size = 1, shuffle=False, num_workers= 4) if os.path.exists(path): checkpoint = torch.load(path) print("Reloading from previously saved checkpoint") model.load_state_dict(checkpoint["state_dict"]) model.eval() dataloaders = {'train_dataloader':train_dataloader,'val_dataloader':val_dataloader} for phase in ['train_dataloader','val_dataloader']: i = 0 for inputs, labels in dataloaders[phase]: inputs_buffer = inputs.permute(0,4,1,2,3).to(device) with torch.set_grad_enabled(False): outputs = model.res2plus1d(inputs_buffer) i += 1 print(f"extracted {i} of {len(dataloaders[phase].dataset)} videos") feats_dir = f"features/{phase}/{commands[labels[0]]}" if not os.path.exists(feats_dir): os.makedirs(feats_dir) np.save(f"{feats_dir}/{commands[labels[0]]}{i}.npy",outputs.cpu().detach().numpy())
GroupOverSample(net.input_size, net.scale_size) ]) elif args.test_crops == 5: cropping = torchvision.transforms.Compose([ GroupFiveCrops(net.input_size, net.scale_size) ]) else: raise ValueError("Only 1 and 10 crops are supported while we got {}".format(args.test_crops)) data_loader = torch.utils.data.DataLoader( VideoDataset(args.root_path, args.val_list, num_segments=args.test_segments, new_length=1 if args.modality == "RGB" else 5, modality=args.modality, image_tmpl=args.rgb_prefix+rgb_read_format, test_mode=True, transform=torchvision.transforms.Compose([ cropping, Stack(roll=(args.arch in ['BNInception','InceptionV3'])), ToTorchFormatTensor(div=(args.arch not in ['BNInception','InceptionV3'])), GroupNormalize(net.input_mean, net.input_std), ]), num_clips=args.num_clips), batch_size=1, shuffle=False, num_workers=args.workers * 2, pin_memory=True) if args.gpus is not None: devices = [args.gpus[i] for i in range(args.workers)] else: devices = list(range(args.workers)) net = torch.nn.DataParallel(net.cuda())
import sys from network_tsn import TSNClassifier from dataset import VideoDatasetTSN as VideoDataset from torch.utils.data import DataLoader from trainer import test_model data_path = '../UCF_for_R21D' im_root = '../UCF-101_of' save_path = 'tsn_model_resnet101_8frame_from_scratch.pth' resize_width = 360 resize_height = 256 crop_size = 224 clip_len = 8 # build model num_classes = 101 model = TSNClassifier(num_classes=num_classes, clip_len=clip_len, base_model='resnet101', pretrained=False) # build dataset val_dataloader = DataLoader(VideoDataset(data_path, im_root, resize_width=resize_width, resize_height=resize_height, crop_size=crop_size, clip_len=clip_len, mode='val'), batch_size=1, num_workers=2) # train model test_model(model, val_dataloader, path=save_path)
from dataset import VideoDataset from sampler import VideoSampler from torch.utils.data import DataLoader import os import matplotlib.pyplot as plt import matplotlib.image as mpimg import numpy as np PATH = './../Dataset_test/' # temp = np.random.rand(360, 640, 3, 2) # np.save(PATH+'flow/forward/forward-0000-0001.npy', temp) # np.save(PATH+'flow/backward/backward-0001-0000.npy', temp) dataset = VideoDataset(PATH) sampler = VideoSampler(dataset, replacement=False) loader = DataLoader(dataset, sampler=sampler, batch_size=1, num_workers=1) dataloader_iterator = iter(loader) for epoch in range(1): print('epoch=%d -------------------------' % (epoch)) for i, data in enumerate(loader, 0): # print(data.shape) print(i) frames, fwd_flow, bwd_flow = data # print(fwd_flow.squeeze().shape) # print(bwd_flow.shape) print(frames.shape) if i > -1: break
import numpy as np from torch.utils.data import DataLoader import torch.nn as nn import torch.optim as optim from torchvision import models from dataset import VideoDataset from model.model import Model device = torch.device('cuda:2') dataset_path = '/home/datasets/mayilong/PycharmProjects/p55/two_stream/datasets/dataset3/data' split_data = '/home/datasets/mayilong/PycharmProjects/p55/two_stream/datasets/dataset3/split_data' train_data = VideoDataset(dataset_path=dataset_path, split_data=split_data, split='train', multi_scale=True, use_flip=True) val_data = VideoDataset(dataset_path=dataset_path, split_data=split_data, split='val', multi_scale=False, use_flip=False) train_loader = DataLoader(train_data, batch_size=16, shuffle=True, num_workers=4) val_loader = DataLoader(val_data, batch_size=16, shuffle=True, num_workers=4) model = Model(7)
def train_model(dataset=dataset, save_dir=save_dir, num_classes=num_classes, lr=lr, num_epochs=nEpochs, save_epoch=snapshot, useTest=useTest, test_interval=nTestInterval): """ Args: num_classes (int): Number of classes in the data num_epochs (int, optional): Number of epochs to train for. """ print('save_dir: ', save_dir) if modelName == 'C3D': model = C3D(num_classes=num_classes, pretrained=True) train_params = [{'params': get_1x_lr_params(model), 'lr': lr}, {'params': get_10x_lr_params(model), 'lr': lr * 10}] else: print('We only implemented C3D models.') raise NotImplementedError criterion = nn.CrossEntropyLoss() # standard crossentropy loss for classification optimizer = optim.SGD(train_params, lr=lr, momentum=0.9, weight_decay=5e-4) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) # the scheduler divides the lr by 10 every 10 epochs if resume_epoch == 0: print("Training {} from scratch...".format(modelName)) else: checkpoint = torch.load(os.path.join(save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'), map_location=lambda storage, loc: storage) # Load all tensors onto the CPU print("Initializing weights from: {}...".format( os.path.join(save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'))) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['opt_dict']) print('Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0)) model.to(device) criterion.to(device) log_dir = os.path.join(save_dir, 'models', datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname()) writer = SummaryWriter(log_dir=log_dir) print('Training model on {} dataset...'.format(dataset)) train_dataloader = DataLoader(VideoDataset(dataset=dataset, split='train', clip_len=16), batch_size=20, shuffle=True, num_workers=4) val_dataloader = DataLoader(VideoDataset(dataset=dataset, split='val', clip_len=16), batch_size=20, num_workers=4) test_dataloader = DataLoader(VideoDataset(dataset=dataset, split='test', clip_len=16), batch_size=20, num_workers=4) trainval_loaders = {'train': train_dataloader, 'val': val_dataloader} trainval_sizes = {x: len(trainval_loaders[x].dataset) for x in ['train', 'val']} test_size = len(test_dataloader.dataset) for epoch in range(resume_epoch, num_epochs): # each epoch has a training and validation step for phase in ['train', 'val']: start_time = timeit.default_timer() # reset the running loss and corrects running_loss = 0.0 running_corrects = 0.0 # set model to train() or eval() mode depending on whether it is trained # or being validated. Primarily affects layers such as BatchNorm or Dropout. if phase == 'train': # scheduler.step() is to be called once every epoch during training scheduler.step() model.train() else: model.eval() for inputs, labels in tqdm(trainval_loaders[phase]): # move inputs and labels to the device the training is taking place on inputs = Variable(inputs, requires_grad=True).to(device) labels = Variable(labels).to(device) optimizer.zero_grad() if phase == 'train': outputs = model(inputs) else: with torch.no_grad(): outputs = model(inputs) probs = nn.Softmax(dim=1)(outputs) preds = torch.max(probs, 1)[1] loss = criterion(outputs, labels) if phase == 'train': loss.backward() optimizer.step() running_loss += loss.item() * inputs.size(0) running_corrects += torch.sum(preds == labels.data) epoch_loss = running_loss / trainval_sizes[phase] epoch_acc = running_corrects.double() / trainval_sizes[phase] if phase == 'train': writer.add_scalar('data/train_loss_epoch', epoch_loss, epoch) writer.add_scalar('data/train_acc_epoch', epoch_acc, epoch) else: writer.add_scalar('data/val_loss_epoch', epoch_loss, epoch) writer.add_scalar('data/val_acc_epoch', epoch_acc, epoch) print("[{}] Epoch: {}/{} Loss: {} Acc: {}".format(phase, epoch+1, nEpochs, epoch_loss, epoch_acc)) stop_time = timeit.default_timer() print("Execution time: " + str(stop_time - start_time) + "\n") if epoch % save_epoch == (save_epoch - 1): torch.save({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'opt_dict': optimizer.state_dict(), }, os.path.join(save_dir, 'models', saveName + '_epoch-' + str(epoch) + '.pth.tar')) print("Save model at {}\n".format(os.path.join(save_dir, 'models', saveName + '_epoch-' + str(epoch) + '.pth.tar'))) if useTest and epoch % test_interval == (test_interval - 1): model.eval() start_time = timeit.default_timer() running_loss = 0.0 running_corrects = 0.0 for inputs, labels in tqdm(test_dataloader): inputs = inputs.to(device) labels = labels.to(device) with torch.no_grad(): outputs = model(inputs) probs = nn.Softmax(dim=1)(outputs) preds = torch.max(probs, 1)[1] loss = criterion(outputs, labels) running_loss += loss.item() * inputs.size(0) running_corrects += torch.sum(preds == labels.data) epoch_loss = running_loss / test_size epoch_acc = running_corrects.double() / test_size writer.add_scalar('data/test_loss_epoch', epoch_loss, epoch) writer.add_scalar('data/test_acc_epoch', epoch_acc, epoch) print("[test] Epoch: {}/{} Loss: {} Acc: {}".format(epoch+1, nEpochs, epoch_loss, epoch_acc)) stop_time = timeit.default_timer() print("Execution time: " + str(stop_time - start_time) + "\n") writer.close()
segment_count = 8 base_model = "resnet50" batch_size = 1 segment_count = 8 snippet_length = 1 # Number of frames composing the snippet, 1 for RGB, 5 for optical flow snippet_channels = 3 # Number of channels in a frame, 3 for RGB, 2 for optical flow height, width = 224, 224 scale = ComposeVideo([Scale((height, width))]) # Eight segments each composed of one frame. # Each segment is ten seconds apart. dataset = video_dataset = VideoDataset(dataset_path, num_frames=snippet_length * segment_count, step_size=10, transform=scale, is_val=False) loader = DataLoader(dataset, batch_size=batch_size, num_workers=0, shuffle=False) tsm = torch.hub.load(repo, "TSM", class_counts, segment_count, "RGB", base_model=base_model, pretrained="epic-kitchens")
from visual_odometry import VisualOdometry from dataset import VideoDataset from mplot import Mplot3d camera_settings_file = "data\kitti06\KITTI04-12.yaml" groundtruth_file = "data\kitti06\groundtruth.txt" dataset_file = "data/kitti06/video.mp4" if __name__ == "__main__": with open(camera_settings_file, 'r') as stream: cam_settings = yaml.load(stream, Loader=yaml.FullLoader) cam = Camera(cam_settings) groundtruth = Groundtruth(groundtruth_file) feature_tracker = ShiTomasiDetector() vo = VisualOdometry(cam, groundtruth, feature_tracker) dataset = VideoDataset(dataset_file) plt3d = Mplot3d(title='3D trajectory') img_id = 0 while (img_id < dataset.num_frames): img = dataset.getImage(img_id) if img is not None: vo.track(img, img_id) # main VO function if (img_id > 2): plt3d.drawTraj(vo.traj3d_gt, 'ground truth',