def videos_to_frames_pipe(data_dir, ext='.avi', target_ext='.allframes.npy', classes=None, min_size=128, max_elements=13320): """ Creates and executes mPyPl pipe to load all videos from 'data_dir' (each subfolder is a separate class), extracts all frames and saves them in numpy format Parameters ---------- data_dir : str, required The directory where all the vidoes are organised in subfolders (subfolder name=class name) ext : str, optional Extension of video files to search for, by default '.avi' target_ext : str, optional Target extension for video frames to be serialized to, by default '.allframes.npy' classes : dict, optional Dictionary with class names and numeral representations. Must match the folder names in 'data_dir'. If set to 'None' it will automatically figure out classes based on folders structure in 'data_dir'. Example {'Class1': 1, 'Class2': 2} Defaults to 'None' min_size : int, optional Minimum size of frames based on the shorter edge, by default 128 max_elements : int, optional Max elements for silly progress indicator, by default 13320 """ (mp.get_datastream(data_dir, classes=classes, ext=ext) | mp.apply('filename', 'clip', lambda fn: VideoFileClip(fn), eval_strategy=mp.EvalStrategies.Value) | mp.apply('clip', 'clip', lambda clip: clip.fx(vfx.resize, width=min_size) if clip.w <= clip.h else clip.fx(vfx.resize, height=min_size), eval_strategy=mp.EvalStrategies.Value) | mp.apply('clip', 'allframes', lambda c: np.asarray(list(c.iter_frames())), eval_strategy=mp.EvalStrategies.Value) | mp.iter('clip', close_clip) | mp.delfield('clip') | cachecomputex( ext, target_ext, lambda x, nfn: np.save(nfn, x['allframes']), lambda x, nfn: print("Skipping saving 'allframes' for {}".format(x[ 'filename']))) | mp.silly_progress(elements=max_elements) | mp.execute)
def get_features_from_files(data_dir, features_ext='.proc.c3d-avg.npy', test_split=[], classes=None, max_elements=13320): """ Creates and executes mPyPl pipe to load feature vectors from serialized files and returns a preprocessed data stream that can be further used with respect to train/test split and specific classes assigned to each element in the stream Parameters ---------- data_dir : str, required The directory where all the vidoes are organised in subfolders (subfolder name=class name) features_ext : str, optional Extension of serialized feature vectors, by default '.proc.c3d-avg.npy' test_split : list, optional List of filenames belonging to the test subset. If empty then there will be no data in the test subset, by default [] classes : dict, optional Dictionary with class names and numeral representations. Must match the folder names in 'data_dir'. If set to 'None' it will automatically figure out classes based on folders structure in 'data_dir'. Example: {'Class1': 1, 'Class2': 2} Defaults to 'None' max_elements : int, optional Max elements for silly progress indicator, by default 13320 Returns ------- list of mPyPl.mdict.mdict List of dictionaries that can be used to access the data """ data = (mp.get_datastream(data_dir, classes=classes, ext=features_ext) | mp.datasplit_by_pattern(test_pattern=test_split) | mp.pshuffle | mp.apply('filename', 'c3d_avg', lambda fn: np.load(fn)) | mp.silly_progress(elements=max_elements) | mp.select_fields(['c3d_avg', 'class_id', 'split']) | mp.as_list) return data
def frames_to_features_pipe(data_dir, mean_std, model, ext='.allframes.npy', target_ext='.proc.c3d-avg.npy', classes=None, frames_per_clip=16, frames_step=8, batch_size=32, max_elements=13320): """ Creates and executes mPyPl pipe to load all video frames, resize and crop them, preprocess, run inferencing against Keras model and serialize the resulting feature vectors as npy format Parameters ---------- data_dir : str, required The directory where all the vidoes are organised in subfolders (subfolder name=class name) mean_std : array, required Array of per channel mean and std values used for preprocessing of frames. Template: array[ [mean_R, mean_G, mean_B], [std_R, std_G, std_B] ] Example: array[ [123, 112, 145], [60, 62, 64] ] model : Keras model obj, required Keras model object ready for running predictions ext : str, optional Extension of frames files to search for, by default '.allframes.npy' target_ext : str, optional Target extension for feature vectors to be serialized to, by default '.proc.c3d-avg.npy' classes : dict, optional Dictionary with class names and numeral representations. Must match the folder names in 'data_dir'. If set to 'None' it will automatically figure out classes based on folders structure in 'data_dir'. Example: {'Class1': 1, 'Class2': 2} Defaults to 'None' frames_per_clip : int, optional When extracting smaller clips from longer video this defines the number of frames cut out from longer clip, by default 16 frames_step : int, optional When extracting smaller clips from longer video this defines the step in number of frames, by default 8 batch_size : int, optional Mini batch size used when pushing data to the model for scoring, by default 32 max_elements : int, optional Max elements for silly progress indicator, by default 13320 """ (mp.get_datastream(data_dir, classes=classes, ext=ext) # load all frames for each video file | mp.apply('filename', 'allframes', lambda fn: np.load(fn), eval_strategy=mp.EvalStrategies.OnDemand) # cut each video into multiple shorter clips definded by frames_per_clip and frames_step parameters | mp.apply('allframes', 'clips16-8', lambda v: extract_clips( v, frames_per_clip=frames_per_clip, step=frames_step), eval_strategy=mp.EvalStrategies.OnDemand) # center crop frames into 112x112 | mp.apply('clips16-8', 'cropped16-8', lambda v: np.asarray([[crop_center(frame) for frame in clip] for clip in v]), eval_strategy=mp.EvalStrategies.OnDemand) # preprocess frames by substracting channel-wise mean | mp.apply('cropped16-8', 'proc_cropped16-8', lambda v: preprocess_input(v, mean_std, divide_std=False), eval_strategy=mp.EvalStrategies.OnDemand) # run batch predictions on c3d model to get feature vectors for each clip | mp.apply_batch('proc_cropped16-8', 'c3d16-8', lambda x: predict_c3d(x, model), batch_size=batch_size) # for each full video take feature vectors for all the extracted clips and average them | mp.apply('c3d16-8', 'c3d_avg', lambda v: np.average(v, axis=0), eval_strategy=mp.EvalStrategies.OnDemand) # draw silly progress | mp.silly_progress(elements=max_elements) # save averaged feature vectors into .npy files | cachecomputex( ext, target_ext, lambda x, nfn: np.save(nfn, x['c3d_avg']), lambda x, nfn: print("Skipping saving 'c3d_avg' {}".format(x['filename']))) | mp.execute)
# Create a session with the above options specified. k.tensorflow_backend.set_session(tf.Session(config=config)) ################################### vgg = keras.applications.vgg16.VGG16(include_top=False, weights='imagenet', input_shape=(video_size[1], video_size[0], 3)) def get_vgg(video): res = vgg.predict(keras.applications.vgg16.preprocess_input(video)) return res def calcvgg(x, nfn): print("Creating VGG descriptors for {}".format(x['filename'])) clp = x['video'] df = get_vgg(np.array(list(clp.iter_frames()))) np.save(nfn, df) if __name__ == "__main__": (mp.get_datastream(data_dir, ext=".resized.mp4") | load_moviepy_video() | cachecomputex(".resized.mp4", ".vgg.npy", calcvgg, lambda x, nx: print("Skipping {}".format(x))) | close_moviepy_video() | execute)
raise ValueError("Data file '%s' not found" % scene_detection_file) return scene_changes if __name__ == "__main__": if (len(sys.argv) > 1): k = int(sys.argv[1]) n = int(sys.argv[2]) config.base_dir = config.base_dir_batch config.data_dir = config.data_dir_batch else: k, n = 0, 1 (mp.get_datastream(data_dir, ext=".full.mp4") | batch(k, n) | mp.fapply('video', resize_video.load_resize) | execute) resized_file_names = (mp.get_datastream(data_dir, ext=".resized.mp4") | mp.select_field("filename") | mp.as_list) # use only the first threshold scene_changes = get_scene_changes(resized_file_names, data_dir)[40] (mp.get_datastream(data_dir, ext=".resized.mp4") | mp.filter("filename", lambda f: os.path.abspath(f) not in scene_changes) | cachecomputex(".resized.mp4", ".optflow.npy", create_denseflow.calcflow, functools.partial(skip, s="creating dense flow"))
def calc_sub(filename, new_filename, model, get_func): print("Processing {}".format(filename)) clp = VideoFileClip(filename) frames = list(clp.iter_frames()) boxes = pickle.load(open(filename.replace('.resized.mp4','.boxes.pickle'), 'rb')) poses = [] for f, bs in zip(frames, boxes): fposes = [] for box in bs: x1,y1,x2,y2 = box.astype(int) pad = abs(x2 - x1) * 0.2 sub = f[max(y1, y1-30):max(y2, y2+30),min(x1, x1-30):max(x2,x2+30)] fposes.append(get_func(model, sub)) poses.append(fposes) pickle.dump(poses, open(new_filename, 'wb')) clp.reader.close() clp.audio.reader.close_proc() # Dense Pose Calculation pose_model = TfPoseEstimator(get_graph_path('cmu'), target_size=(432, 368)) pcalc_pose = partial(calc_sub, model=pose_model, get_func=get_poses) stream = ( mp.get_datastream(data_dir, ext='.resized.mp4') | mp.select_field('filename') | cachecompute('.resized.mp4','.poses.pickle', pcalc_pose, lambda x, nx: print("Skipping {}".format(x))) | execute )
conf = yaml.load(f) parser = argparse.ArgumentParser() parser.add_argument("dir", help="Directory of people photos", default=".") parser.add_argument("--facegroup", help="Name of face group", default="maingroup") args = parser.parse_args() face.BaseUrl.set(conf['FaceApi']['Endpoint']) face.Key.set(conf['FaceApi']['Key']) classes = mp.get_classes(args.dir) data = mp.get_datastream(args.dir, classes=classes) | mp.as_list print("Person Group Trainer Utility") print(" + found {} people".format(len(classes))) print(" + Creating face group {}".format(args.facegroup)) face.person_group.create(args.facegroup, name=args.facegroup) people = {} for p in classes.keys(): photos = data | mp.filter('class_name', lambda x: x == p) | mp.as_list print("Adding person {} - {} pics".format(p, len(photos))) pers = face.person.create(args.facegroup, p) people[pers['personId']] = p for x in photos: print(" + Adding photo {}".format(x['filename']), end='')
flow = fflow.run(frames, boxes) pickle.dump(flow, open(new_filename, 'wb')) clp.reader.close() clp.audio.reader.close_proc() # params for ShiTomasi corner detection feature_params = dict(maxCorners=50, qualityLevel=0.1, minDistance=5, blockSize=5) # Parameters for lucas kanade optical flow lk_params = dict(winSize=(15, 15), maxLevel=3, criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)) fflow = FocusedFlow(feature_params, lk_params) pcalc_fflow = partial(calc_fflow, fflow=fflow, fps=5) stream = (mp.get_datastream( data_dir, ext=".full.mp4", classes={ 'noshot': 1, 'shot': 2, 'attack': 0 }) | mp.select_field('filename') | cachecompute(".full.mp4", ".fflow.pickle", pcalc_fflow, lambda x, nx: print("Skipping {}".format(x))) | execute)
import mPyPl.utils.image as mpui from mPyPl.utils.pipeutils import * from pipe import * import functools as fn import keras print(mp.__version__) train_dir = os.path.join(base_dir, 'training_set') test_dir = os.path.join(base_dir, 'test_set') classes = mp.get_classes(train_dir) # we need to explicitly get classes in order to have the same correspondence of class and int for train and test set # Show first few images from the training set seq = (mp.get_datastream(train_dir, classes=classes) | take(10) | mp.apply( 'filename', 'image', lambda fn: mpui.im_resize_pad(cv2.imread(fn), size=(100, 100))) | mp.select_field('image') | pexec(fn.partial(mpui.show_images, cols=2))) transform = keras.preprocessing.image.ImageDataGenerator( rotation_range=40, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode='nearest')
fx.write_videofile(nfn) clip.close() def load_resize(x): fn = x['filename'] nfn = fn.replace('.full.mp4','.resized.mp4') x['filename'] = nfn if os.path.isfile(nfn): print("Loading resized {}".format(nfn)) vc = VideoFileClip(nfn) return vc else: print("Resizing {}".format(fn)) vc = VideoFileClip(fn).fx(vfx.resize, width=video_width) vc.write_videofile(nfn) return vc def resize(x): v = VideoFileClip(x) vfxc = v.fx(vfx.resize, width=video_width) return (v, vfxc) if __name__ == "__main__": (mp.get_datastream(data_dir,ext=".full.mp4") | where( lambda f: not os.path.isfile( f['filename'].replace(".full.mp4",".resized.mp4") ) ) | mp.apply('filename','video', resize ) | cachecomputex(".full.mp4",".resized.mp4",resize_save,lambda x,nx: print("Skipping {}".format(x['filename']))) | execute )
def main(data_dir): x = (mp.get_datastream(data_dir, ext=".resized.mp4") | mp.select_field("filename") | mp.as_list ) return detect_and_write(x, filename = os.path.join(data_dir, "scene.changes.pkl"))