def get_video_frame_annotation(): setup_tensorflow() output_dir = mkdtemp() png_output = join(output_dir, "png") oflow_output = join(output_dir, "oflow") de = DataExtraction(FrameInput( input_path="test-data/video-clip.mp4", bsize=128 )) frames = [1, 3, 310] # make sure there is an empty batch: 128-255 de.run_annotators([ ColorHistogramAnnotator(frames=frames, colorspace="lab"), DominantColorAnnotator(frames=frames), DiffAnnotator(quantiles=[40]), EmbedAnnotator(embedding=EmbedFrameKerasResNet50(), frames=frames), FaceAnnotator( detector=FaceDetectMtcnn(), embedding=FaceEmbedVgg2(), frames=frames ), HOFMAnnotator(frames=frames), ObjectAnnotator(detector=ObjectDetectRetinaNet(), frames=frames), OpticalFlowAnnotator(output_dir=oflow_output, frames=frames), PngAnnotator(output_dir=png_output, frames=frames), ImgAnnotator() ]) return de, output_dir
def get_image_annotation(): setup_tensorflow() output_dir = mkdtemp() png_output = join(output_dir, "png") oflow_output = join(output_dir, "oflow") de = DataExtraction(ImageInput( input_paths="test-data/img/*" )) de.run_annotators([ ColorHistogramAnnotator(colorspace="luv"), DominantColorAnnotator(), EmbedAnnotator(embedding=EmbedFrameKerasResNet50()), FaceAnnotator( detector=FaceDetectMtcnn(), embedding=FaceEmbedVgg2() ), ObjectAnnotator(detector=ObjectDetectRetinaNet()), PngAnnotator(output_dir=png_output, size=229), ImgAnnotator() ]) return de, output_dir
def get_video_annotation(): setup_tensorflow() output_dir = mkdtemp() png_output = join(output_dir, "png") oflow_output = join(output_dir, "oflow") de = DataExtraction(FrameInput( input_path="test-data/video-clip.mp4", bsize=256 )) freq = 128 de.run_annotators([ ColorHistogramAnnotator(freq=freq), DominantColorAnnotator(freq=freq), DiffAnnotator(quantiles=[40]), EmbedAnnotator(embedding=EmbedFrameKerasResNet50(), freq=freq), FaceAnnotator( detector=FaceDetectMtcnn(), embedding=FaceEmbedVgg2(), freq=freq ), HOFMAnnotator(freq=freq), ObjectAnnotator(detector=ObjectDetectRetinaNet(), freq=freq), OpticalFlowAnnotator(output_dir=oflow_output, freq=freq), PngAnnotator(output_dir=png_output, freq=freq) ], max_batch=2) return de, output_dir
def test_fixed_frames(self): # only grab these frames frames = [0, 3, 17, 18, 21] # create processor pipeline fpobj = FrameProcessor() fpobj.load_annotator(CIElabAnnotator(frames=frames)) fpobj.load_annotator(ClutterAnnotator(frames=frames)) fpobj.load_annotator( EmbedAnnotator(embedding=EmbedFrameKerasResNet50(), frames=frames)) fpobj.load_annotator( FaceAnnotator(detector=FaceDetectDlib(), frames=frames)) fpobj.load_annotator( ObjectAnnotator(detector=ObjectDetectRetinaNet(), frames=frames)) # run over the input, making sure to include a batch (the middle one) # that does not have any data finput = FrameInput("test-data/video-clip.mp4", bsize=8) fpobj.process(finput, max_batch=3) # check that the output is the correct size assert fpobj.collect("clutter")["frame"].tolist() == frames assert fpobj.collect("cielab")["frame"].tolist() == frames assert fpobj.collect("embed")["frame"].tolist() == frames assert set(fpobj.collect("face")["frame"]) == set(frames) assert set(fpobj.collect("object")["frame"]) == set(frames)
def test_fprint(self): face_anno = FaceAnnotator( detector=FaceDetectDlib(), embedding=FaceEmbedVgg2(), freq=4 ) fpobj = FrameProcessor() fpobj.load_annotator(face_anno) finput = FrameInput("test-data/video-clip.mp4", bsize=8) fpobj.process(finput, max_batch=2) obj_out = fpobj.collect_all() pa = PeopleAggregator( face_names=["person 1", "person 2"], fprint=obj_out["face"]["embed"][[0, 1]], ) agg = pa.aggregate(obj_out).todf() assert set(agg.keys()) == set( [ "video", "frame", "top", "bottom", "right", "left", "confidence", "person", "person-dist", ] )
def test_face_detector_cutoff_mtcnn(self): anno = FaceAnnotator(detector=FaceDetectMtcnn(cutoff=0.99997), freq=4) fpobj = FrameProcessor() fpobj.load_annotator(anno) finput = FrameInput("test-data/video-clip.mp4", bsize=8) fpobj.process(finput, max_batch=2) obj_out = fpobj.collect("face") expected_keys = [ "video", "frame", "confidence", "top", "bottom", "left", "right", ] assert set(obj_out.keys()) == set(expected_keys) assert obj_out.shape == (4, 7)
def test_face_vgg2_embed(self): anno = FaceAnnotator(detector=FaceDetectDlib(), embedding=FaceEmbedVgg2(), freq=4) fpobj = FrameProcessor() fpobj.load_annotator(anno) finput = FrameInput("test-data/video-clip.mp4", bsize=8) fpobj.process(finput, max_batch=2) obj_out = fpobj.collect("face") expected_keys = [ "video", "frame", "confidence", "top", "bottom", "left", "right", "embed", ] assert set(obj_out.keys()) == set(expected_keys) assert issubclass(type(obj_out["embed"]), np.ndarray) assert obj_out["embed"].shape == (8, 2048)