def load_dataloader(args, train_paths, val_paths):
    train_dataset = VideoDataset(train_paths, args.cnn_feat)
    val_dataset = VideoDataset(val_paths, args.cnn_feat)

    train_dataloader = data.DataLoader(train_dataset,
                                       batch_size=1,
                                       shuffle=True,
                                       num_workers=4,
                                       pin_memory=True)
    val_dataloader = data.DataLoader(val_dataset,
                                     batch_size=1,
                                     shuffle=False,
                                     num_workers=4,
                                     pin_memory=True)

    return train_dataloader, val_dataloader
Example #2
0
def get_activations(files, data_type, model, batch_size, size, length, dims, device):
	"""Calculates the activations of the pool_3 layer for all images.
	Params:
	-- files       : List of image files paths
	-- model       : Instance of inception model
	-- batch_size  : Batch size of images for the model to process at once.
					 Make sure that the number of samples is a multiple of
					 the batch size, otherwise some samples are ignored. This
					 behavior is retained to match the original FID score
					 implementation.
	-- dims        : Dimensionality of features returned by Inception
	-- device      : Device to run calculations
	Returns:
	-- A numpy array of dimension (num images, dims) that contains the
	   activations of the given tensor when feeding inception with the
	   query tensor.
	"""
	model.eval()

	if batch_size > len(files):
		print(('Warning: batch size is bigger than the data size. Setting batch size to data size'))
		batch_size = len(files)
	
	transform = torchvision.transforms.Compose([
		transforms_vid.ClipResize((size, size)),
		transforms_vid.ClipToTensor(),
		transforms_vid.ClipNormalize(mean=[114.7748, 107.7354, 99.4750], std=[1, 1, 1])]
	)

	if data_type == 'video':
		ds = VideoDataset(files, length, transform)
	elif data_type == 'frame':
		ds = FrameDataset(files, length, transform)
	else:
		raise NotImplementedError
	dl = torch.utils.data.DataLoader(ds, batch_size=batch_size, drop_last=False, num_workers=cpu_count())

	pred_arr = torch.zeros(len(files), dims).to(device)

	start_idx = 0

	for batch in tqdm(dl):

		batch = batch.to(device)

		with torch.no_grad():
			pred = model(batch)

		if pred.size(2) != 1 or pred.size(3) != 1 or pred.size(4) != 1:
			pred = adaptive_avg_pool3d(pred, output_size=(1, 1, 1))

		pred = pred.squeeze(4).squeeze(3).squeeze(2)
		pred_arr[start_idx:start_idx + pred.shape[0]] = pred
		start_idx = start_idx + pred.shape[0]

	pred_arr = pred_arr.cpu().numpy()

	return pred_arr
Example #3
0
def get_dataloaders(args):
    dataloaders = []
    for fold in range(4):
        loader = {}
        loader['train'] = torch.utils.data.DataLoader(VideoDataset(fold, 'train', args.cls),
                                                      batch_size=args.train_batch_size,
                                                      num_workers=args.num_workers,
                                                      shuffle=True,
                                                      pin_memory=True,
                                                      worker_init_fn=worker_init_fn)
        loader['test'] = torch.utils.data.DataLoader(VideoDataset(fold, 'test', args.cls),
                                                     batch_size=args.test_batch_size,
                                                     num_workers=args.num_workers,
                                                     shuffle=False,
                                                     pin_memory=True,
                                                     worker_init_fn=worker_init_fn)
        dataloaders.append(loader)
    return dataloaders
Example #4
0
def get_dataloaders(args):
    dataloaders = {}

    dataloaders['train'] = torch.utils.data.DataLoader(
        VideoDataset('train', args),
        batch_size=args.train_batch_size,
        num_workers=args.num_workers,
        shuffle=True,
        pin_memory=True,
        worker_init_fn=worker_init_fn)

    dataloaders['test'] = torch.utils.data.DataLoader(
        VideoDataset('test', args),
        batch_size=args.test_batch_size,
        num_workers=args.num_workers,
        shuffle=False,
        pin_memory=True,
        worker_init_fn=worker_init_fn)
    return dataloaders
Example #5
0
def prepare_dataset(args):
    print('Training model on {} dataset...'.format(args.dataset))
    train_data_loader = DataLoader(VideoDataset(args=args),
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   drop_last=True)

    train_val_loaders = {'train': train_data_loader}
    train_val_sizes = {x: len(train_val_loaders[x].dataset) for x in ['train']}

    return train_val_loaders, train_val_sizes
Example #6
0
def prepare_dataset(configs):
    if configs["dataset"]["name"] not in ["mug", "isogd", "surreal"]:
        raise NotImplementedError

    return VideoDataset(
        configs["dataset"]["name"],
        Path(configs["dataset"]["path"]),
        eval(f'preprocess_{configs["dataset"]["name"]}_dataset'),
        configs['video_length'],
        configs['image_size'],
        configs["dataset"]['number_limit'],
    )
Example #7
0
def new_mockdataset(video_length, image_size, geometric_info="depth"):
    inputs = {
        "name": "mock",
        "dataset_path": "data/raw/mock",
        "preprocess_func": None,
        "video_length": video_length,
        "image_size": image_size,
        "geometric_info": geometric_info,
        "extension": "png",
    }

    return VideoDataset(**inputs)
Example #8
0
def prepare_dataset(args):
    print('Training model on {} dataset...'.format(args.dataset))
    train_data_loader = DataLoader(VideoDataset(args=args, split='train'),
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   drop_last=True)
    val_data_loader = DataLoader(VideoDataset(args=args, split='val'),
                                 batch_size=args.batch_size,
                                 num_workers=args.num_workers,
                                 drop_last=True)
    test_data_loader = DataLoader(VideoDataset(args=args, split='test'),
                                  batch_size=args.batch_size,
                                  num_workers=args.num_workers,
                                  drop_last=True)

    train_val_loaders = {'train': train_data_loader, 'val': val_data_loader}
    train_val_sizes = {
        x: len(train_val_loaders[x].dataset)
        for x in ['train', 'val']
    }
    test_size = len(test_data_loader.dataset)
    return train_val_loaders, train_val_sizes, test_data_loader, test_size
Example #9
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    if args.dataset == 'something-v1':
        num_class = 174
    elif args.dataset == 'diving48':
        num_class = 48
    elif args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'skating2':
        num_class = 63
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    model_dir = os.path.join('experiments', args.dataset, args.arch,
                             args.consensus_type + '-' + args.modality,
                             str(args.run_iter))
    args.train_list, args.val_list, args.root_path, args.rgb_prefix = datasets_video.return_dataset(
        args.dataset)
    if 'something' in args.dataset:
        # label transformation for left/right categories
        target_transforms = {
            86: 87,
            87: 86,
            93: 94,
            94: 93,
            166: 167,
            167: 166
        }
        print('Target transformation is enabled....')
    else:
        target_transforms = None

    if not args.resume_rgb:
        if os.path.exists(model_dir):
            print('Dir {} exists!!!  it will be removed'.format(model_dir))
            shutil.rmtree(model_dir)
        os.makedirs(model_dir)
        os.makedirs(os.path.join(model_dir, args.root_log))

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['flow', 'RGBDiff']:
        data_length = 5
        # data_length = 1

    if args.resume_rgb:
        if args.modality == 'RGB':
            if 'gst' in args.arch:
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='GST',
                                      backbone=args.arch,
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi)
            elif 'stm' in args.arch:
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='STM',
                                      backbone=args.arch,
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi)
            elif 'tmp' in args.arch:
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='TMP',
                                      backbone=args.arch,
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi)
            elif 'tsm' in args.arch:
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='TSM',
                                      backbone=args.arch,
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi)
            elif 'ori' in args.arch:
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='ORI',
                                      backbone=args.arch,
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi)
            elif 'I3D' in args.arch:
                print("!!!!!!!!!!!!!!!!!!!!!!!\n\n")
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='I3D',
                                      backbone=args.arch,
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi)

            else:
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='ORI',
                                      backbone=args.arch,
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi)
            if os.path.isfile(args.resume_rgb):
                print(("=> loading checkpoint '{}'".format(args.resume_rgb)))
                checkpoint = torch.load(args.resume_rgb)
                args.start_epoch = checkpoint['epoch']
                best_prec1 = checkpoint['best_prec1']
                original_checkpoint = checkpoint['state_dict']
                print(("(epoch {} ) best_prec1 : {} ".format(
                    checkpoint['epoch'], best_prec1)))
                original_checkpoint = {
                    k[7:]: v
                    for k, v in original_checkpoint.items()
                }
                #model_dict =  i3d_model.state_dict()
                #model_dict.update(pretrained_dict)
                model.load_state_dict(original_checkpoint)
                print(
                    ("=> loaded checkpoint '{}' (epoch {} ) best_prec1 : {} ".
                     format(args.resume_rgb, checkpoint['epoch'], best_prec1)))
            else:
                raise ValueError("=> no checkpoint found at '{}'".format(
                    args.resume_rgb))
    else:
        if args.modality == 'flow':
            if 'I3D' in args.arch:
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='I3D',
                                      backbone=args.arch,
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi,
                                      modality='flow',
                                      new_length=data_length)
        elif args.modality == 'RGB':
            if 'gst' in args.arch:
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='GST',
                                      backbone=args.arch,
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi)
            elif 'stm' in args.arch:
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='STM',
                                      backbone=args.arch,
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi)
            elif 'tmp' in args.arch:
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='TMP',
                                      backbone=args.arch,
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi)
            elif 'tsm' in args.arch:
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='TSM',
                                      backbone=args.arch,
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi)
            elif 'ori' in args.arch:
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='ORI',
                                      backbone=args.arch,
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi)
            elif 'I3D' in args.arch:
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='I3D',
                                      backbone=args.arch,
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi)
            else:
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='ORI',
                                      backbone=args.arch + '_ori',
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi)

    cudnn.benchmark = True
    writer = SummaryWriter(model_dir)
    # Data loading code
    args.store_name = '_'.join([
        args.dataset, args.arch, args.consensus_type,
        'segment%d' % args.num_segments
    ])
    print('storing name: ' + args.store_name)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = get_optim_policies(model)
    train_augmentation = get_augmentation(mode='train')
    val_trans = get_augmentation(mode='val')
    normalize = GroupNormalize(input_mean, input_std)

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()
    if args.dataset == 'diving48':
        args.root_path = args.root_path + '/train'

    train_loader = torch.utils.data.DataLoader(VideoDataset(
        args.root_path,
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=args.rgb_prefix,
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ]),
        dataset=args.dataset),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)
    print("trainloader.type = {}".format(type(train_loader)))
    if args.dataset == 'diving48':
        args.root_path = args.root_path[:-6] + '/test'
    val_loader = torch.utils.data.DataLoader(VideoDataset(
        args.root_path,
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=args.rgb_prefix,
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ]),
        dataset=args.dataset),
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        log_test = open('test_not.csv', 'w')
        validate(val_loader, model, criterion, log_test)
        os.remove(log_test)
        return

    if args.lr_scheduler == 'cos_warmup':
        lr_scheduler_clr = CosineAnnealingLR.WarmupCosineLR(
            optimizer=optimizer,
            milestones=[args.warmup, args.epochs],
            warmup_iters=args.warmup,
            min_ratio=1e-7)
    elif args.lr_scheduler == 'lr_step_warmup':
        lr_scheduler_clr = CosineAnnealingLR.WarmupStepLR(
            optimizer=optimizer,
            milestones=[args.warmup] +
            [args.epochs - 30, args.epochs - 10, args.epochs],
            warmup_iters=args.warmup)
    elif args.lr_scheduler == 'lr_step':
        lr_scheduler_clr = torch.optim.lr_scheduler.MultiStepLR(
            optimizer, args.lr_steps, 0.1)
    if args.resume_rgb:
        for epoch in range(0, args.start_epoch):
            optimizer.step()
            lr_scheduler_clr.step()

    log_training = open(
        os.path.join(model_dir, args.root_log, '%s.csv' % args.store_name),
        'a')
    for epoch in range(args.start_epoch, args.epochs):
        writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch + 1)
        train(train_loader,
              model,
              criterion,
              optimizer,
              epoch,
              log_training,
              writer=writer)
        lr_scheduler_clr.step()
        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader,
                             model,
                             criterion,
                             log_training,
                             writer=writer,
                             epoch=epoch)
            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                    'lr': optimizer.param_groups[-1]['lr'],
                }, is_best, model_dir)
            print('best_prec1: {}'.format(best_prec1))
        else:
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                    'lr': optimizer.param_groups[-1]['lr'],
                }, False, model_dir)
Example #10
0
    # buffer = (buffer - np.mean(buffer)) / np.std(buffer)
    buffer = dataloader.loadvideo(fname)
    buffer = dataloader.normalize(buffer)
    buffer = torch.FloatTensor(buffer).permute(3, 0, 1, 2).unsqueeze(0)

    outputs = model(buffer)
    _, preds = torch.max(outputs, 1)
    commands = [
        'click_here', 'close_window', 'down_scroll', 'drag', 'drop_here',
        'go_backward', 'go_forward', 'scroll_up', 'search_this', 'zoom_in',
        'zoom_out'
    ]
    # for s in sorted(list(zip(outputs.detach().numpy()[0],commands)),reverse=True):
    #     print(s)

    print(commands[preds[0].data])


if __name__ == "__main__":
    directory = "./zxsu/"
    val_set = VideoDataset(directory, mode='val')
    # restores the model and optimizer state_dicts
    lip_model = R2Plus1DClassifier(num_classes=11, layer_sizes=[3, 3, 3, 3])
    state_dicts = torch.load(
        "/home/rkmtlab/projects/zxsu/SilentCut_Oct/pure_model.pt",
        map_location=torch.device("cpu"))
    lip_model.load_state_dict(state_dicts)
    lip_model.eval()
    for f in glob(
            "/home/rkmtlab/projects/zxsu/SilentCut_Oct/zxsu/click_here/*.avi"):
        recognize(f, lip_model, val_set)
Example #11
0
def main():
    finetuning = False

    global args, best_prec1
    args = parser.parse_args()
    check_rootfolders()

    if args.dataset == 'something-v1':
        num_class = 174
        args.rgb_prefix = ''
        rgb_read_format = "{:05d}.jpg"
    elif args.dataset == 'diving48':
        num_class = 48
        args.rgb_prefix = 'frames'
        rgb_read_format = "{:05d}.jpg"
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    model_dir = os.path.join('experiments', args.dataset, args.arch,
                             args.consensus_type + '-' + args.modality,
                             str(args.run_iter))
    if not args.resume:
        if os.path.exists(model_dir):
            print('Dir {} exists!!!'.format(model_dir))
            sys.exit()
        else:
            os.makedirs(model_dir)
            os.makedirs(os.path.join(model_dir, args.root_log))

    writer = SummaryWriter(model_dir)

    args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(
        args.dataset)

    if 'something' in args.dataset:
        # label transformation for left/right categories
        target_transforms = {
            86: 87,
            87: 86,
            93: 94,
            94: 93,
            166: 167,
            167: 166
        }
        print('Target transformation is enabled....')
    else:
        target_transforms = None

    args.store_name = '_'.join([
        args.dataset, args.arch, args.consensus_type,
        'segment%d' % args.num_segments
    ])
    print('storing name: ' + args.store_name)

    model = VideoModel(num_class=num_class,
                       modality=args.modality,
                       num_segments=args.num_segments,
                       base_model=args.arch,
                       consensus_type=args.consensus_type,
                       dropout=args.dropout,
                       partial_bn=not args.no_partialbn,
                       gsm=args.gsm,
                       target_transform=target_transforms)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(VideoDataset(
        args.root_path,
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=args.rgb_prefix + rgb_read_format,
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(VideoDataset(
        args.root_path,
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=args.rgb_prefix + rgb_read_format,
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    lr_scheduler_clr = CosineAnnealingLR.WarmupCosineLR(
        optimizer=optimizer,
        milestones=[args.warmup, args.epochs],
        warmup_iters=args.warmup,
        min_ratio=1e-7)
    if args.resume:
        for epoch in range(0, args.start_epoch):
            lr_scheduler_clr.step()

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    log_training = open(
        os.path.join(model_dir, args.root_log, '%s.csv' % args.store_name),
        'a')
    for epoch in range(args.start_epoch, args.epochs):

        writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch + 1)

        train_prec1 = train(train_loader,
                            model,
                            criterion,
                            optimizer,
                            epoch,
                            log_training,
                            writer=writer)

        lr_scheduler_clr.step()

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader,
                             model,
                             criterion, (epoch + 1) * len(train_loader),
                             log_training,
                             writer=writer,
                             epoch=epoch)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                    'current_prec1': prec1,
                    'lr': optimizer.param_groups[-1]['lr'],
                }, is_best, model_dir)
        else:
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                    'current_prec1': train_prec1,
                    'lr': optimizer.param_groups[-1]['lr'],
                }, False, model_dir)
Example #12
0
import os
import torch
import numpy as np
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from dataset import VideoDataset
from model.model import Model

device = torch.device('cuda:1')

train_data = VideoDataset(
    root_dir=
    '/home/datasets/mayilong/PycharmProjects/p55/two_stream/v1/data/datasets',
    split_data=
    '/home/datasets/mayilong/PycharmProjects/p55/two_stream/data/split_data',
    split='train',
)
val_data = VideoDataset(
    root_dir=
    '/home/datasets/mayilong/PycharmProjects/p55/two_stream/v1/data/datasets',
    split_data=
    '/home/datasets/mayilong/PycharmProjects/p55/two_stream/data/split_data',
    split='val',
)

train_loader = DataLoader(train_data,
                          batch_size=32,
                          shuffle=True,
                          num_workers=4)
Example #13
0
def main():

    global args

    args = parser.parse_args()
  
    train_videofolder, val_videofolder, args.root_path, _ = return_dataset(args.dataset)

    num_class = 174
    rgb_prefix = ''
    rgb_read_format = "{:05d}.jpg"

    model = VideoModel(num_class=num_class, modality=args.modality,
                        num_segments=args.num_segments, base_model=args.arch, consensus_type=args.consensus_type,
                        dropout=args.dropout, partial_bn=not args.no_partialbn, gsm=args.gsm, target_transform=None)

    model.consensus = Identity()
    print("parameters", sum(p.numel() for p in model.parameters()))

    print(model)
    sys.exit(1)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std

    train_augmentation = model.get_augmentation()
    policies = model.get_optim_policies()
    model = model.cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    normalize = GroupNormalize(input_mean, input_std)



    dataset = VideoDataset(args.root_path, train_videofolder, num_segments=8,
                   new_length=1,
                   modality="RGB",
                   image_tmpl=rgb_prefix+rgb_read_format,
                   transform=torchvision.transforms.Compose([
                       train_augmentation,
                       Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
                       ToTorchFormatTensor(div=(args.arch not in ['BNInception', 'InceptionV3'])),
                       normalize
                   ]))

    def normalize_output(img):
        img = img - img.min()
        img = img / img.max()
        return img
    data = dataset[0][0].unsqueeze_(0).cuda()
    output = model(data)

    #print(model)
    #.exit(1)

    # Plot some images
    idx = torch.randint(0, output.size(0), ())
    #pred = normalize_output(output[idx, 0])
    img = data[idx, 0]

    #fig, axarr = plt.subplots(1, 2)
    plt.imshow(img.cpu().detach().numpy())
    #axarr[1].imshow(pred.cpu().detach().numpy())

    # Visualize feature maps
    activation = {}
    def get_activation(name):
        def hook(model, input, output):
            activation[name] = output.detach()
        return hook

  

    model.base_model.conv1_7x7_s2.register_forward_hook(get_activation('conv1'))
    data, _ = dataset[0]
    data.unsqueeze_(0)
    output = model(data.cuda())

    kernels = model.base_model.conv1_7x7_s2.weight.cpu().detach()

    fig, axarr = plt.subplots(kernels.size(0)-40, figsize=(15,15))
    for idx in range(kernels.size(0)-40):
        axarr[idx].imshow(np.transpose(kernels[idx].squeeze(), (1,2,0)))
        

    act = activation['conv1'].squeeze()
    fig, axarr = plt.subplots(act.size(0), figsize=(15,15))
    for idx in range(act.size(0)):
        axarr[idx].imshow(np.transpose(act[idx][:3].cpu(), (1,2,0)))

    plt.tight_layout()
    plt.show()
Example #14
0
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# vim:fenc=utf-8
#

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from dataset import VideoDataset
from torch.utils.data import DataLoader

device = torch.device('cuda:1')
test_data = VideoDataset(
    root_dir='/home/datasets/mayilong/PycharmProjects/p55/data/rgb',
    split_data='/home/datasets/mayilong/PycharmProjects/p55/data/split_data',
    split='test',
    n_frame=16)

test_loader = DataLoader(test_data, batch_size=8, shuffle=True)

print('test samples : {}'.format(len(test_data)))
model = C3D(7)


def test():
    model.load_state_dict(torch.load('./trained_model/c3d_new_0.7226.pth'))
    model.to(device)

    test_corrects = 0
    for idx, (buf, labels) in enumerate(test_loader):
Example #15
0
    cropping = torchvision.transforms.Compose(
        [GroupOverSample(net.input_size, net.scale_size)])
else:
    raise ValueError(
        "Only 1 and 10 crops are supported while we got {}".format(
            args.test_crops))

data_loader = torch.utils.data.DataLoader(VideoDataset(
    directory=args.directory,
    num_segments=args.test_segments,
    root_path=args.video_root,
    new_length=1 if args.modality == "RGB" else 5,
    modality=args.modality,
    image_tmpl=args.video_prefix,
    test_mode=True,
    video_length=args.video_length,
    seq_length=args.seq_length,
    transform=torchvision.transforms.Compose([
        cropping,
        Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
        ToTorchFormatTensor(
            div=(args.arch not in ['BNInception', 'InceptionV3'])),
        GroupNormalize(net.input_mean, net.input_std),
    ])),
                                          batch_size=1,
                                          shuffle=False,
                                          num_workers=args.workers * 2,
                                          pin_memory=True)

if args.gpus is not None:
    devices = [args.gpus[i] for i in range(args.workers)]
# Load model
if args.model_type == 'mattingbase':
    model = MattingBase(args.model_backbone)
if args.model_type == 'mattingrefine':
    model = MattingRefine(args.model_backbone, args.model_backbone_scale,
                          args.model_refine_mode,
                          args.model_refine_sample_pixels,
                          args.model_refine_threshold,
                          args.model_refine_kernel_size)

model = model.to(device).eval()
model.load_state_dict(torch.load(args.model_checkpoint), strict=False)

# Load video and background
vid = VideoDataset(args.video_src)
bgr = [Image.open(args.video_bgr).convert('RGB')]
dataset = ZipDataset([vid, bgr],
                     transforms=A.PairCompose([
                         A.PairApply(
                             T.Resize(args.video_resize[::-1]) if args.
                             video_resize else nn.Identity()),
                         HomographicAlignment() if args.preprocess_alignment
                         else A.PairApply(nn.Identity()),
                         A.PairApply(T.ToTensor())
                     ]))

# Create output directory
if os.path.exists(args.output_dir):
    if input(f'Directory {args.output_dir} already exists. Override? [Y/N]: '
             ).lower() == 'y':
Example #17
0
                              lr=args.lr,
                              momentum=args.momentum,
                              weight_decay=args.l2wd)
        #optimizer = optim.RMSprop(model.parameters(), lr = 1e-2, alpha = 0.99)
        # trying on dynamic scheduler
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                         patience=10,
                                                         threshold=1e-3,
                                                         min_lr=1e-6)

        # preparing the training and validation dataset
        train_dataloader = DataLoader(VideoDataset(args.dataset_path,
                                                   args.dataset,
                                                   args.split,
                                                   'train',
                                                   args.modality,
                                                   mean_sub=args.meansub,
                                                   clip_len=args.clip_length,
                                                   test_mode=args.test_mode,
                                                   test_amt=args.test_amt),
                                      batch_size=args.batch_size,
                                      shuffle=True)
        val_dataloader = DataLoader(VideoDataset(args.dataset_path,
                                                 args.dataset,
                                                 args.split,
                                                 'validation',
                                                 args.modality,
                                                 mean_sub=args.meansub,
                                                 clip_len=args.clip_length,
                                                 test_mode=args.test_mode,
                                                 test_amt=args.test_amt),
Example #18
0
            }, path)

    # print the total time needed, HH:MM:SS format
    time_elapsed = time.time() - start
    print(
        f"Training complete in {time_elapsed//3600}h {(time_elapsed%3600)//60}m {time_elapsed %60}s"
    )


# initalize the ResNet 18 version of this model
model = R2Plus1DClassifier(num_classes=2, layer_sizes=[2, 2, 2, 2]).to(device)
criterion = nn.CrossEntropyLoss(
)  # standard crossentropy loss for classification

# prepare the dataloaders into a dict
train_dataloader = DataLoader(VideoDataset('/home/irhum/data/video'),
                              batch_size=32,
                              shuffle=True,
                              num_workers=4)
# IF training on Kinetics-600 and require exactly a million samples each epoch,
# import VideoDataset1M and uncomment the following
# train_dataloader = DataLoader(VideoDataset1M('/home/irhum/data/video'), batch_size=32, num_workers=4)
val_dataloader = DataLoader(VideoDataset('/home/irhum/data/video', mode='val'),
                            batch_size=32,
                            num_workers=4)
dataloaders = {'train': train_dataloader, 'val': val_dataloader}

# hyperparameters as given in paper sec 4.1
optimizer = optim.SGD(model.parameters(), lr=0.01)
# the scheduler divides the lr by 10 every 10 epochs
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
Example #19
0
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from model.model import Model
from dataset import VideoDataset
from torch.utils.data import DataLoader
import time

device = torch.device('cuda:1')

dataset_path = '/home/datasets/mayilong/PycharmProjects/p55/two_stream/datasets/dataset3/data'
split_data = '/home/datasets/mayilong/PycharmProjects/p55/two_stream/dataset/split_data'

test_data = VideoDataset(dataset_path=dataset_path,
                         split_data=split_data,
                         split='test',
                         multi_scale=False,
                         use_flip=False)

test_loader = DataLoader(test_data, batch_size=4, shuffle=True, num_workers=4)

model = Model(7).to(device)
model.load_state_dict(
    torch.load('./trained_model/two_stream_0.8678.pth')['state_dict'])
print('load model success')


def predict():
    corrects_so_far = 0
    count_so_far = 0
    print('Start trainning')
                         weight_decay=reg_weight)
    loss_func = CustomLoss().cuda()
    scheduler = CosineAnnealingWarmRestarts(reg_optimizer,
                                            T_0=10,
                                            T_mult=2,
                                            eta_min=1e-5)
    split = 'train'
    seg_dir = '/home/yangzehua/UCF_Crimes/FLOW_Segments'
    anno_dir = '/home/yangzehua/RoadAccidentsDetector/ucf_train_test_info/CADP_Annotations.txt'
    path_dir = '/home/yangzehua/RoadAccidentsDetector/ucf_train_test_info/CADP_Test.txt'
    # test_seg_dir = os.path.join(seg_dir, 'test')
    test_seg_dir = '/home/yangzehua/UCF_Crimes/CADP_FLOW_Segments/test'
    model_save_dir = 'Vanilla_FLOW_CADP.pt'
    graph_save_dir = 'Vanilla_FLOW_CADP.png'

    dataset = VideoDataset(data_dir=seg_dir, split=split)
    video_loader = DataLoader(dataset=dataset,
                              batch_size=batch_size,
                              num_workers=8,
                              shuffle=True,
                              drop_last=True)

    loss_list = []
    auc = 0.0
    auc_list = [0]
    for epoch in tqdm(range(epoch_num)):
        epoch_loss = 0
        for batchX, batchY in video_loader:
            batchX = batchX.cuda()
            batchY = batchY.cuda()
            score_pred = net(batchX).cuda()
Example #21
0
# Load model
if args.model_type == 'mattingbase':
    model = MattingBase(args.model_backbone)
if args.model_type == 'mattingrefine':
    model = MattingRefine(args.model_backbone, args.model_backbone_scale,
                          args.model_refine_mode,
                          args.model_refine_sample_pixels,
                          args.model_refine_threshold,
                          args.model_refine_kernel_size)

model = model.to(device).eval()
model.load_state_dict(torch.load(args.model_checkpoint, map_location=device),
                      strict=False)

# Load video and background
vid = VideoDataset(args.video_src)
bgr = [Image.open(args.video_bgr).convert('RGB')]
dataset = ZipDataset([vid, bgr],
                     transforms=A.PairCompose([
                         A.PairApply(
                             T.Resize(args.video_resize[::-1]) if args.
                             video_resize else nn.Identity()),
                         HomographicAlignment() if args.preprocess_alignment
                         else A.PairApply(nn.Identity()),
                         A.PairApply(T.ToTensor())
                     ]))
if args.video_target_bgr:
    dataset = ZipDataset([
        dataset,
        VideoDataset(args.video_target_bgr, transforms=T.ToTensor())
    ])
Example #22
0
def main():

    global args, best_prec1

    args = parser.parse_args()

    if not os.path.exists('./record'):
        os.mkdir('./record')

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    elif args.dataset == 'sthsth':
        num_class = 174
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    model = SlowFastNet(num_class)
    train_augmentation = get_augmentation('RGB', input_size)
    model = torch.nn.DataParallel(model).cuda()

    args.start_epoch=0
    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})"
                   .format(args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    normalize = torchvision.transforms.Compose([GroupNormalize(input_mean, input_std),f2Dt3D()])

    train_loader = torch.utils.data.DataLoader(
        VideoDataset(args.root_path, args.train_list,
                     transform=torchvision.transforms.Compose([
                         train_augmentation,
                         Stack(roll=False),
                         ToTorchFormatTensor(div=True),
                         normalize,
                     ]), mode='train', T=args.T, tau=args.tau, dense_sample=not args.no_dense_sample),
        batch_size=args.batch_size, shuffle=True,
        num_workers=args.workers, pin_memory=True)

    val_loader = torch.utils.data.DataLoader(
        VideoDataset(args.root_path, args.val_list,
                     transform=torchvision.transforms.Compose([
                         GroupScale(int(scale_size)),
                         GroupCenterCrop(input_size),
                         Stack(roll=False),
                         ToTorchFormatTensor(div=True),
                         normalize,
                     ]), mode='test', T=args.T, tau=args.tau, dense_sample=not args.no_dense_sample),
        batch_size=args.batch_size, shuffle=False,
        num_workers=args.workers, pin_memory=True)

    # define loss function (criterion) and optimizer
    criterion = torch.nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
    schduler = WarmUpMultiStepLR(optimizer, [20, 30, 40], 0.1, last_epoch=args.start_epoch-1)

    # the way in the raw paper ,But I do not use it, because I can't estimate how many iter to train
    # max_step = len(train_loader)*args.epochs
    # lr_lambda = lambda step: 0.5 * args.lr* ((np.cos(step / max_step * np.pi)) + 1)
    # scheduler = torch.nn.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=[lr_lambda])
    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    for epoch in range(args.start_epoch, args.epochs):
        schduler.step()
        print('Epoch {}/{}'.format(epoch + 1, args.epochs))
        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)
        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion, epoch)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint({
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'best_prec1': best_prec1,
        }, is_best, epoch + 1)
Example #23
0
def train_model(num_classes, directory, path="model_data.pth.tar"):
    # batch_size = 20
    commands = sorted([
    'caption',
    'play',
    'stop',
    'go_back',
    'go_forward',
    'previous',
    'next',
    'volume_up',
    'volume_down',
    'maximize',
    'expand',
    'delete',
    'save',
    'like',
    'dislike',
    'share',
    'add_to_queue',
    'watch_later',
    'home',
    'trending',
    'subscription',
    'original',
    'library',
    'profile',
    'notification',
    'scroll_up',
    'scroll_down',
    'click'])
    device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
    folder = Path(directory)
    train_fnames,train_labels,val_fnames,val_labels = [],[],[],[]
    for label in sorted(os.listdir(folder)):
        shuffled_list = os.listdir(os.path.join(folder, label))
        random.Random(4).shuffle(shuffled_list)
        for fname in shuffled_list[:-10]:
            train_fnames.append(os.path.join(folder, label, fname))
            train_labels.append(label)
        for fname in shuffled_list[-10:]:
            val_fnames.append(os.path.join(folder, label, fname))
            val_labels.append(label)
    layer_sizes=[2,2,2,2,2,2]
    save=True
    # initalize the ResNet 18 version of this model
    model = R2Plus1DClassifier(num_classes=num_classes, layer_sizes=layer_sizes).to(device)

    transforms = video_transforms.Compose([video_transforms.CenterCrop((30,60))])
    train_set = VideoDataset(fnames=train_fnames,labels=train_labels,transforms=transforms)
    train_set = VideoDataset(fnames=val_fnames,labels=val_labels,transforms=transforms)

    train_dataloader = DataLoader(train_set, batch_size = 1, shuffle=False, num_workers= 4)
    val_dataloader = DataLoader(train_set, batch_size = 1, shuffle=False, num_workers= 4)

    if os.path.exists(path):
        checkpoint = torch.load(path)
        print("Reloading from previously saved checkpoint")
        model.load_state_dict(checkpoint["state_dict"])
    model.eval()
    
    dataloaders = {'train_dataloader':train_dataloader,'val_dataloader':val_dataloader}
    for phase in ['train_dataloader','val_dataloader']:
        i = 0  
        for inputs, labels in dataloaders[phase]:
            inputs_buffer = inputs.permute(0,4,1,2,3).to(device)

            with torch.set_grad_enabled(False):
                outputs = model.res2plus1d(inputs_buffer) 

            i += 1
            print(f"extracted {i} of {len(dataloaders[phase].dataset)} videos")
            feats_dir = f"features/{phase}/{commands[labels[0]]}"
            if not os.path.exists(feats_dir):
                os.makedirs(feats_dir) 
            np.save(f"{feats_dir}/{commands[labels[0]]}{i}.npy",outputs.cpu().detach().numpy())
Example #24
0
        GroupOverSample(net.input_size, net.scale_size)
    ])
elif args.test_crops == 5:
    cropping = torchvision.transforms.Compose([
        GroupFiveCrops(net.input_size, net.scale_size)
    ])
else:
    raise ValueError("Only 1 and 10 crops are supported while we got {}".format(args.test_crops))

data_loader = torch.utils.data.DataLoader(
        VideoDataset(args.root_path, args.val_list, num_segments=args.test_segments,
                   new_length=1 if args.modality == "RGB" else 5,
                   modality=args.modality,
                   image_tmpl=args.rgb_prefix+rgb_read_format,
                   test_mode=True,
                   transform=torchvision.transforms.Compose([
                       cropping,
                       Stack(roll=(args.arch in ['BNInception','InceptionV3'])),
                       ToTorchFormatTensor(div=(args.arch not in ['BNInception','InceptionV3'])),
                       GroupNormalize(net.input_mean, net.input_std),
                   ]), num_clips=args.num_clips),
        batch_size=1, shuffle=False,
        num_workers=args.workers * 2, pin_memory=True)

if args.gpus is not None:
    devices = [args.gpus[i] for i in range(args.workers)]
else:
    devices = list(range(args.workers))


net = torch.nn.DataParallel(net.cuda())
Example #25
0
import sys
from network_tsn import TSNClassifier
from dataset import VideoDatasetTSN as VideoDataset
from torch.utils.data import DataLoader
from trainer import test_model
data_path = '../UCF_for_R21D'
im_root = '../UCF-101_of'
save_path = 'tsn_model_resnet101_8frame_from_scratch.pth'
resize_width = 360
resize_height = 256
crop_size = 224
clip_len = 8
# build model
num_classes = 101
model = TSNClassifier(num_classes=num_classes,
                      clip_len=clip_len,
                      base_model='resnet101',
                      pretrained=False)
# build dataset
val_dataloader = DataLoader(VideoDataset(data_path,
                                         im_root,
                                         resize_width=resize_width,
                                         resize_height=resize_height,
                                         crop_size=crop_size,
                                         clip_len=clip_len,
                                         mode='val'),
                            batch_size=1,
                            num_workers=2)
# train model
test_model(model, val_dataloader, path=save_path)
Example #26
0
from dataset import VideoDataset
from sampler import VideoSampler
from torch.utils.data import DataLoader
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np

PATH = './../Dataset_test/'

# temp = np.random.rand(360, 640, 3, 2)
# np.save(PATH+'flow/forward/forward-0000-0001.npy', temp)
# np.save(PATH+'flow/backward/backward-0001-0000.npy', temp)

dataset = VideoDataset(PATH)
sampler = VideoSampler(dataset, replacement=False)
loader = DataLoader(dataset, sampler=sampler, batch_size=1, num_workers=1)
dataloader_iterator = iter(loader)

for epoch in range(1):
    print('epoch=%d -------------------------' % (epoch))
    for i, data in enumerate(loader, 0):
        # print(data.shape)
        print(i)
        frames, fwd_flow, bwd_flow = data

        # print(fwd_flow.squeeze().shape)
        # print(bwd_flow.shape)
        print(frames.shape)

        if i > -1: break
Example #27
0
import numpy as np
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from dataset import VideoDataset
from model.model import Model

device = torch.device('cuda:2')

dataset_path = '/home/datasets/mayilong/PycharmProjects/p55/two_stream/datasets/dataset3/data'
split_data = '/home/datasets/mayilong/PycharmProjects/p55/two_stream/datasets/dataset3/split_data'

train_data = VideoDataset(dataset_path=dataset_path,
                          split_data=split_data,
                          split='train',
                          multi_scale=True,
                          use_flip=True)
val_data = VideoDataset(dataset_path=dataset_path,
                        split_data=split_data,
                        split='val',
                        multi_scale=False,
                        use_flip=False)

train_loader = DataLoader(train_data,
                          batch_size=16,
                          shuffle=True,
                          num_workers=4)
val_loader = DataLoader(val_data, batch_size=16, shuffle=True, num_workers=4)

model = Model(7)
def train_model(dataset=dataset, save_dir=save_dir, num_classes=num_classes, lr=lr,
                num_epochs=nEpochs, save_epoch=snapshot, useTest=useTest, test_interval=nTestInterval):
    """
        Args:
            num_classes (int): Number of classes in the data
            num_epochs (int, optional): Number of epochs to train for.
    """
    print('save_dir: ', save_dir)
    if modelName == 'C3D':
        model = C3D(num_classes=num_classes, pretrained=True)
        train_params = [{'params': get_1x_lr_params(model), 'lr': lr},
                        {'params': get_10x_lr_params(model), 'lr': lr * 10}]
    else:
        print('We only implemented C3D models.')
        raise NotImplementedError
    criterion = nn.CrossEntropyLoss()  # standard crossentropy loss for classification
    optimizer = optim.SGD(train_params, lr=lr, momentum=0.9, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10,
                                          gamma=0.1)  # the scheduler divides the lr by 10 every 10 epochs

    if resume_epoch == 0:
        print("Training {} from scratch...".format(modelName))
    else:
        checkpoint = torch.load(os.path.join(save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'),
                                map_location=lambda storage, loc: storage)   # Load all tensors onto the CPU
        print("Initializing weights from: {}...".format(
            os.path.join(save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar')))
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['opt_dict'])

    print('Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0))
    model.to(device)
    criterion.to(device)

    log_dir = os.path.join(save_dir, 'models', datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname())
    writer = SummaryWriter(log_dir=log_dir)

    print('Training model on {} dataset...'.format(dataset))
    train_dataloader = DataLoader(VideoDataset(dataset=dataset, split='train', clip_len=16), batch_size=20, shuffle=True, num_workers=4)
    val_dataloader = DataLoader(VideoDataset(dataset=dataset, split='val',  clip_len=16), batch_size=20, num_workers=4)
    test_dataloader = DataLoader(VideoDataset(dataset=dataset, split='test', clip_len=16), batch_size=20, num_workers=4)

    trainval_loaders = {'train': train_dataloader, 'val': val_dataloader}
    trainval_sizes = {x: len(trainval_loaders[x].dataset) for x in ['train', 'val']}
    test_size = len(test_dataloader.dataset)

    for epoch in range(resume_epoch, num_epochs):
        # each epoch has a training and validation step
        for phase in ['train', 'val']:
            start_time = timeit.default_timer()

            # reset the running loss and corrects
            running_loss = 0.0
            running_corrects = 0.0

            # set model to train() or eval() mode depending on whether it is trained
            # or being validated. Primarily affects layers such as BatchNorm or Dropout.
            if phase == 'train':
                # scheduler.step() is to be called once every epoch during training
                scheduler.step()
                model.train()
            else:
                model.eval()

            for inputs, labels in tqdm(trainval_loaders[phase]):
                # move inputs and labels to the device the training is taking place on
                inputs = Variable(inputs, requires_grad=True).to(device)
                labels = Variable(labels).to(device)
                optimizer.zero_grad()

                if phase == 'train':
                    outputs = model(inputs)
                else:
                    with torch.no_grad():
                        outputs = model(inputs)

                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                loss = criterion(outputs, labels)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / trainval_sizes[phase]
            epoch_acc = running_corrects.double() / trainval_sizes[phase]

            if phase == 'train':
                writer.add_scalar('data/train_loss_epoch', epoch_loss, epoch)
                writer.add_scalar('data/train_acc_epoch', epoch_acc, epoch)
            else:
                writer.add_scalar('data/val_loss_epoch', epoch_loss, epoch)
                writer.add_scalar('data/val_acc_epoch', epoch_acc, epoch)

            print("[{}] Epoch: {}/{} Loss: {} Acc: {}".format(phase, epoch+1, nEpochs, epoch_loss, epoch_acc))
            stop_time = timeit.default_timer()
            print("Execution time: " + str(stop_time - start_time) + "\n")

        if epoch % save_epoch == (save_epoch - 1):
            torch.save({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'opt_dict': optimizer.state_dict(),
            }, os.path.join(save_dir, 'models', saveName + '_epoch-' + str(epoch) + '.pth.tar'))
            print("Save model at {}\n".format(os.path.join(save_dir, 'models', saveName + '_epoch-' + str(epoch) + '.pth.tar')))

        if useTest and epoch % test_interval == (test_interval - 1):
            model.eval()
            start_time = timeit.default_timer()

            running_loss = 0.0
            running_corrects = 0.0

            for inputs, labels in tqdm(test_dataloader):
                inputs = inputs.to(device)
                labels = labels.to(device)

                with torch.no_grad():
                    outputs = model(inputs)
                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                loss = criterion(outputs, labels)

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / test_size
            epoch_acc = running_corrects.double() / test_size

            writer.add_scalar('data/test_loss_epoch', epoch_loss, epoch)
            writer.add_scalar('data/test_acc_epoch', epoch_acc, epoch)

            print("[test] Epoch: {}/{} Loss: {} Acc: {}".format(epoch+1, nEpochs, epoch_loss, epoch_acc))
            stop_time = timeit.default_timer()
            print("Execution time: " + str(stop_time - start_time) + "\n")

    writer.close()
Example #29
0
segment_count = 8
base_model = "resnet50"

batch_size = 1
segment_count = 8
snippet_length = 1  # Number of frames composing the snippet, 1 for RGB, 5 for optical flow
snippet_channels = 3  # Number of channels in a frame, 3 for RGB, 2 for optical flow
height, width = 224, 224

scale = ComposeVideo([Scale((height, width))])

# Eight segments each composed of one frame.
# Each segment is ten seconds apart.
dataset = video_dataset = VideoDataset(dataset_path,
                                       num_frames=snippet_length *
                                       segment_count,
                                       step_size=10,
                                       transform=scale,
                                       is_val=False)

loader = DataLoader(dataset,
                    batch_size=batch_size,
                    num_workers=0,
                    shuffle=False)

tsm = torch.hub.load(repo,
                     "TSM",
                     class_counts,
                     segment_count,
                     "RGB",
                     base_model=base_model,
                     pretrained="epic-kitchens")
Example #30
0
from visual_odometry import VisualOdometry
from dataset import VideoDataset
from mplot import Mplot3d

camera_settings_file = "data\kitti06\KITTI04-12.yaml"
groundtruth_file = "data\kitti06\groundtruth.txt"
dataset_file = "data/kitti06/video.mp4"
if __name__ == "__main__":

    with open(camera_settings_file, 'r') as stream:
        cam_settings = yaml.load(stream, Loader=yaml.FullLoader)
    cam = Camera(cam_settings)
    groundtruth = Groundtruth(groundtruth_file)
    feature_tracker = ShiTomasiDetector()
    vo = VisualOdometry(cam, groundtruth, feature_tracker)
    dataset = VideoDataset(dataset_file)

    plt3d = Mplot3d(title='3D trajectory')

    img_id = 0
    while (img_id < dataset.num_frames):

        img = dataset.getImage(img_id)

        if img is not None:

            vo.track(img, img_id)  # main VO function

            if (img_id > 2):
                plt3d.drawTraj(vo.traj3d_gt,
                               'ground truth',