def main(argv): parser = argparse.ArgumentParser() parser.add_argument( 'video_list', help = 'Input video list. Put path to video file on each line.') parser.add_argument( 'output_dir', help = 'Output directory.') parser.add_argument( '--sample_rate', type = float, default = 5.0, help = 'Number of frames sampled per second') parser.add_argument( '--model_def', default = '/auto/iris-00/rn/chensun/ThirdParty/caffe_models/vgg_16/VGG_ILSVRC_16_layers_deploy.prototxt', help = 'Model definition file (default VGG16)') parser.add_argument( '--pretrained_model', default = '/auto/iris-00/rn/chensun/ThirdParty/caffe_models/vgg_16/VGG_ILSVRC_16_layers.caffemodel', help = 'Model parameter file (default VGG16)') parser.add_argument( '--layers', default = 'fc6,fc7', help = 'Layers to be extracted, separated by commas') parser.add_argument( '--cpu', action = 'store_true', help = 'Use CPU if set') parser.add_argument( '--oversample', action = 'store_true', help = 'Oversample 10 patches per frame if set') args = parser.parse_args() if args.cpu: caffe.set_mode_cpu() print 'CPU mode' else: caffe.set_mode_gpu() print 'GPU mode' oversample = False if args.oversample: oversample = True #feture extraction extractor = FeatExtractor(args.model_def, args.pretrained_model, oversample=oversample) blobs = args.layers.split(',') with open(args.video_list) as f: videos = [l.rstrip() for l in f] for video_file in videos: frames = load_video(video_file, args.sample_rate) if len(frames) < 1: # failed to open the video continue start = time.time() feats = extractor.extract_batch(frames, blobs) print '%s feature extracted in %f seconds.' % (os.path.basename(video_file), time.time()-start) # save the features for blob in blobs: feats[blob] = np.array(feats[blob]) save_matrix(feats, os.path.join(args.output_dir, '%s.mat' % os.path.basename(video_file).split('.')[0])) return
def main(argv): parser = argparse.ArgumentParser() parser.add_argument( 'video_list', help='Input video list. Put path to video file on each line.') parser.add_argument('output_dir', help='Output directory.') parser.add_argument('--sample_rate', type=float, default=5.0, help='Number of frames sampled per second') parser.add_argument( '--model_def', default= '/auto/iris-00/rn/chensun/ThirdParty/caffe_models/vgg_16/VGG_ILSVRC_16_layers_deploy.prototxt', help='Model definition file (default VGG16)') parser.add_argument( '--pretrained_model', default= '/auto/iris-00/rn/chensun/ThirdParty/caffe_models/vgg_16/VGG_ILSVRC_16_layers.caffemodel', help='Model parameter file (default VGG16)') parser.add_argument('--layers', default='fc6,fc7', help='Layers to be extracted, separated by commas') parser.add_argument('--cpu', action='store_true', help='Use CPU if set') parser.add_argument('--oversample', action='store_true', help='Oversample 10 patches per frame if set') args = parser.parse_args() if args.cpu: caffe.set_mode_cpu() print 'CPU mode' else: caffe.set_mode_gpu() print 'GPU mode' oversample = False if args.oversample: oversample = True extractor = FeatExtractor(args.model_def, args.pretrained_model, oversample=oversample) blobs = args.layers.split(',') with open(args.video_list) as f: videos = [l.rstrip() for l in f] for video_file in videos: frames = load_video(video_file, args.sample_rate) if len(frames) < 1: # failed to open the video continue start = time.time() feats = extractor.extract_batch(frames, blobs) print '%s feature extracted in %f seconds.' % ( os.path.basename(video_file), time.time() - start) # save the features for blob in blobs: feats[blob] = np.array(feats[blob]) save_matrix( feats, os.path.join(args.output_dir, '%s.mat' % os.path.basename(video_file).split('.')[0])) return
def main(argv): parser = argparse.ArgumentParser() parser.add_argument( 'video_list', help = 'Input video list. Put path to video file on each line.') parser.add_argument( 'output_dir', help = 'Output directory.') parser.add_argument( '--sample_rate', type = float, default = 5.0, help = 'Number of frames sampled per second') parser.add_argument( '--crop_dim', type = float, default = 224, help = 'Crop dim as defined in the prototxt file. width==height==224(default)') parser.add_argument( '--model_def', default = '/auto/iris-00/rn/chensun/ThirdParty/caffe_models/vgg_16/VGG_ILSVRC_16_layers_deploy.prototxt', help = 'Model definition file (default VGG16)') parser.add_argument( '--pretrained_model', default = '/auto/iris-00/rn/chensun/ThirdParty/caffe_models/vgg_16/VGG_ILSVRC_16_layers.caffemodel', help = 'Model parameter file (default VGG16)') parser.add_argument( '--layers', default = 'fc6,fc7', help = 'Layers to be extracted, separated by commas') parser.add_argument( '--cpu', action = 'store_true', help = 'Use CPU if set') parser.add_argument( '--oversample', action = 'store_true', help = 'Oversample 10 patches per frame if set') parser.add_argument( '--gpu_id', default = 0, help = 'The GPU ID to use.') args = parser.parse_args() if args.cpu: caffe.set_mode_cpu() print 'CPU mode' else: caffe.set_device(int(args.gpu_id)) caffe.set_mode_gpu() print 'GPU mode' oversample = False if args.oversample: oversample = True extractor = FeatExtractor(args.model_def, args.pretrained_model, oversample=oversample, crop_dim=args.crop_dim) blobs = args.layers.split(',') with open(args.video_list) as f: videos = [l.rstrip() for l in f] print "batch_size=" + str(extractor.batch_size) if(args.sample_rate > 0): print "The input list is a video file list" elif(args.sample_rate == 0): print "The input list is tar of key frames list" for video_file in videos: if(args.sample_rate > 0): frames = load_video(video_file, args.sample_rate) elif(args.sample_rate == 0): try: frames = load_keyframes_targz(video_file) except: continue if len(frames) < 1: # failed to open the video continue start = time.time() feats = extractor.extract_batch(frames, blobs) print '%s feature extracted in %f seconds.' % (os.path.basename(video_file), time.time()-start) out_vid_feats = [] #average pooling and normalization for blob in blobs: featpooling = np.mean(feats[blob], axis=0) l2norm = LA.norm(featpooling, 2) featpooling = featpooling/l2norm sign_pnorm = np.sign(featpooling) * np.sqrt(np.abs(featpooling)) #signed component-wise norm p = 1/2 out_vid_feats.append(sign_pnorm) vid_feat = np.hstack(out_vid_feats)/len(out_vid_feats) #concatnate the layer output #convert to libsvm format libsvm = [] for idx, val in enumerate(vid_feat): if val != 0: libsvm.append("{}:{:6f}".format(int(idx)+1,val)) svmout = " ".join(libsvm) #write out as the bow file outfilename = os.path.join(args.output_dir, os.path.basename(video_file).split('.')[0]+".bow") f_out = open(outfilename, 'w') f_out.write(svmout) f_out.write("\n") f_out.close() #write out as the gzip #outfilename = os.path.join(args.output_dir, os.path.basename(video_file).split('.')[0]+".gz") #f_out = gzip.open(outfilename, 'wb') #f_out.writelines(svmout) #f_out.close() # save the features #for blob in blobs: # feats[blob] = np.array(feats[blob]) #save_matrix(feats, os.path.join(args.output_dir, '%s.mat' % os.path.basename(video_file).split('.')[0])) return
def main(): global device global data_parallel print("=> will save everthing to {}".format(args.output_dir)) output_dir = Path(args.output_dir) output_dir.makedirs_p() # Data loading code train_transform = pose_transforms.Compose([ pose_transforms.RandomHorizontalFlip(), pose_transforms.ArrayToTensor() ]) valid_transform = pose_transforms.Compose( [pose_transforms.ArrayToTensor()]) print("=> fetching sequences in '{}'".format(args.dataset_dir)) dataset_dir = Path(args.dataset_dir) print("=> preparing train set") train_set = dataset() #transform=train_transform) print("=> preparing val set") val_set = pose_framework_KITTI(dataset_dir, args.test_sequences, transform=valid_transform, seed=args.seed, shuffle=False) train_loader = torch.utils.data.DataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(val_set, batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True) # create model odometry_net = PoseExpNet().to(device) depth_net = DispNetS().to(device) feat_extractor = FeatExtractor().to(device) # init weights of model if args.odometry is None: odometry_net.init_weights() elif args.odometry: weights = torch.load(args.odometry) odometry_net.load_state_dict(weights) if args.depth is None: depth_net.init_weights() elif args.depth: weights = torch.load(args.depth) depth_net.load_state_dict(weights['state_dict']) feat_extractor.init_weights() cudnn.benchmark = True if args.cuda and args.gpu_id in range(2): os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_id) elif args.cuda: data_parallel = True odometry_net = torch.nn.DataParallel(odometry_net) depth_net = torch.nn.DataParallel(depth_net) feat_extractor = torch.nn.DataParallel(feat_extractor) optim_params = [{ 'params': odometry_net.parameters(), 'lr': args.lr }, { 'params': depth_net.parameters(), 'lr': args.lr }, { 'params': feat_extractor.parameters(), 'lr': args.lr }] # optimizer = optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.weight_decay, momentum=args.momentum) optimizer = optim.Adam(optim_params, betas=(0.9, 0.999), eps=1e-08, weight_decay=args.weight_decay) print("=> validating before training") #validate(odometry_net, depth_net, val_loader, 0, output_dir, True) print("=> training & validating") #validate(odometry_net, depth_net, val_loader, 0, output_dir) for epoch in range(1, args.epochs + 1): train(odometry_net, depth_net, feat_extractor, train_loader, epoch, optimizer) validate(odometry_net, depth_net, feat_extractor, val_loader, epoch, output_dir)