def main(argv): parser = argparse.ArgumentParser() parser.add_argument( 'video_list', help = 'Input video list. Put path to video file on each line.') parser.add_argument( 'output_dir', help = 'Output directory.') parser.add_argument( '--sample_rate', type = float, default = 5.0, help = 'Number of frames sampled per second') parser.add_argument( '--model_def', default = '/auto/iris-00/rn/chensun/ThirdParty/caffe_models/vgg_16/VGG_ILSVRC_16_layers_deploy.prototxt', help = 'Model definition file (default VGG16)') parser.add_argument( '--pretrained_model', default = '/auto/iris-00/rn/chensun/ThirdParty/caffe_models/vgg_16/VGG_ILSVRC_16_layers.caffemodel', help = 'Model parameter file (default VGG16)') parser.add_argument( '--layers', default = 'fc6,fc7', help = 'Layers to be extracted, separated by commas') parser.add_argument( '--cpu', action = 'store_true', help = 'Use CPU if set') parser.add_argument( '--oversample', action = 'store_true', help = 'Oversample 10 patches per frame if set') args = parser.parse_args() if args.cpu: caffe.set_mode_cpu() print 'CPU mode' else: caffe.set_mode_gpu() print 'GPU mode' oversample = False if args.oversample: oversample = True #feture extraction extractor = FeatExtractor(args.model_def, args.pretrained_model, oversample=oversample) blobs = args.layers.split(',') with open(args.video_list) as f: videos = [l.rstrip() for l in f] for video_file in videos: frames = load_video(video_file, args.sample_rate) if len(frames) < 1: # failed to open the video continue start = time.time() feats = extractor.extract_batch(frames, blobs) print '%s feature extracted in %f seconds.' % (os.path.basename(video_file), time.time()-start) # save the features for blob in blobs: feats[blob] = np.array(feats[blob]) save_matrix(feats, os.path.join(args.output_dir, '%s.mat' % os.path.basename(video_file).split('.')[0])) return
def main(argv): parser = argparse.ArgumentParser() parser.add_argument( 'video_list', help='Input video list. Put path to video file on each line.') parser.add_argument('output_dir', help='Output directory.') parser.add_argument('--sample_rate', type=float, default=5.0, help='Number of frames sampled per second') parser.add_argument( '--model_def', default= '/auto/iris-00/rn/chensun/ThirdParty/caffe_models/vgg_16/VGG_ILSVRC_16_layers_deploy.prototxt', help='Model definition file (default VGG16)') parser.add_argument( '--pretrained_model', default= '/auto/iris-00/rn/chensun/ThirdParty/caffe_models/vgg_16/VGG_ILSVRC_16_layers.caffemodel', help='Model parameter file (default VGG16)') parser.add_argument('--layers', default='fc6,fc7', help='Layers to be extracted, separated by commas') parser.add_argument('--cpu', action='store_true', help='Use CPU if set') parser.add_argument('--oversample', action='store_true', help='Oversample 10 patches per frame if set') args = parser.parse_args() if args.cpu: caffe.set_mode_cpu() print 'CPU mode' else: caffe.set_mode_gpu() print 'GPU mode' oversample = False if args.oversample: oversample = True extractor = FeatExtractor(args.model_def, args.pretrained_model, oversample=oversample) blobs = args.layers.split(',') with open(args.video_list) as f: videos = [l.rstrip() for l in f] for video_file in videos: frames = load_video(video_file, args.sample_rate) if len(frames) < 1: # failed to open the video continue start = time.time() feats = extractor.extract_batch(frames, blobs) print '%s feature extracted in %f seconds.' % ( os.path.basename(video_file), time.time() - start) # save the features for blob in blobs: feats[blob] = np.array(feats[blob]) save_matrix( feats, os.path.join(args.output_dir, '%s.mat' % os.path.basename(video_file).split('.')[0])) return
def main(argv): parser = argparse.ArgumentParser() parser.add_argument( 'video_list', help = 'Input video list. Put path to video file on each line.') parser.add_argument( 'output_dir', help = 'Output directory.') parser.add_argument( '--sample_rate', type = float, default = 5.0, help = 'Number of frames sampled per second') parser.add_argument( '--crop_dim', type = float, default = 224, help = 'Crop dim as defined in the prototxt file. width==height==224(default)') parser.add_argument( '--model_def', default = '/auto/iris-00/rn/chensun/ThirdParty/caffe_models/vgg_16/VGG_ILSVRC_16_layers_deploy.prototxt', help = 'Model definition file (default VGG16)') parser.add_argument( '--pretrained_model', default = '/auto/iris-00/rn/chensun/ThirdParty/caffe_models/vgg_16/VGG_ILSVRC_16_layers.caffemodel', help = 'Model parameter file (default VGG16)') parser.add_argument( '--layers', default = 'fc6,fc7', help = 'Layers to be extracted, separated by commas') parser.add_argument( '--cpu', action = 'store_true', help = 'Use CPU if set') parser.add_argument( '--oversample', action = 'store_true', help = 'Oversample 10 patches per frame if set') parser.add_argument( '--gpu_id', default = 0, help = 'The GPU ID to use.') args = parser.parse_args() if args.cpu: caffe.set_mode_cpu() print 'CPU mode' else: caffe.set_device(int(args.gpu_id)) caffe.set_mode_gpu() print 'GPU mode' oversample = False if args.oversample: oversample = True extractor = FeatExtractor(args.model_def, args.pretrained_model, oversample=oversample, crop_dim=args.crop_dim) blobs = args.layers.split(',') with open(args.video_list) as f: videos = [l.rstrip() for l in f] print "batch_size=" + str(extractor.batch_size) if(args.sample_rate > 0): print "The input list is a video file list" elif(args.sample_rate == 0): print "The input list is tar of key frames list" for video_file in videos: if(args.sample_rate > 0): frames = load_video(video_file, args.sample_rate) elif(args.sample_rate == 0): try: frames = load_keyframes_targz(video_file) except: continue if len(frames) < 1: # failed to open the video continue start = time.time() feats = extractor.extract_batch(frames, blobs) print '%s feature extracted in %f seconds.' % (os.path.basename(video_file), time.time()-start) out_vid_feats = [] #average pooling and normalization for blob in blobs: featpooling = np.mean(feats[blob], axis=0) l2norm = LA.norm(featpooling, 2) featpooling = featpooling/l2norm sign_pnorm = np.sign(featpooling) * np.sqrt(np.abs(featpooling)) #signed component-wise norm p = 1/2 out_vid_feats.append(sign_pnorm) vid_feat = np.hstack(out_vid_feats)/len(out_vid_feats) #concatnate the layer output #convert to libsvm format libsvm = [] for idx, val in enumerate(vid_feat): if val != 0: libsvm.append("{}:{:6f}".format(int(idx)+1,val)) svmout = " ".join(libsvm) #write out as the bow file outfilename = os.path.join(args.output_dir, os.path.basename(video_file).split('.')[0]+".bow") f_out = open(outfilename, 'w') f_out.write(svmout) f_out.write("\n") f_out.close() #write out as the gzip #outfilename = os.path.join(args.output_dir, os.path.basename(video_file).split('.')[0]+".gz") #f_out = gzip.open(outfilename, 'wb') #f_out.writelines(svmout) #f_out.close() # save the features #for blob in blobs: # feats[blob] = np.array(feats[blob]) #save_matrix(feats, os.path.join(args.output_dir, '%s.mat' % os.path.basename(video_file).split('.')[0])) return