def three_people(): from query.models import Face from rekall.video_interval_collection import VideoIntervalCollection from rekall.parsers import in_array, bbox_payload_parser from rekall.merge_ops import payload_plus from esper.rekall import intrvllists_to_result_bbox from rekall.payload_predicates import payload_satisfies from rekall.spatial_predicates import scene_graph from rekall.bbox_predicates import height_at_least, left_of, same_value MIN_FACE_HEIGHT = 0.3 EPSILON = 0.05 # Annotate face rows with start and end frames and the video ID faces = Face.objects.annotate(min_frame=F('frame__number'), max_frame=F('frame__number'), video_id=F('frame__video_id')) # Materialize all the faces and load them into rekall with bounding box payloads # Then coalesce them so that all faces in the same frame are in the same interval # NOTE that this is slow right now since we're loading all faces! face_lists = VideoIntervalCollection.from_django_qs( faces, with_payload=in_array( bbox_payload_parser( VideoIntervalCollection.django_accessor))).coalesce( payload_merge_op=payload_plus) three_people_scene_graph = { 'nodes': [{ 'name': 'face1', 'predicates': [height_at_least(MIN_FACE_HEIGHT)] }, { 'name': 'face2', 'predicates': [height_at_least(MIN_FACE_HEIGHT)] }, { 'name': 'face3', 'predicates': [height_at_least(MIN_FACE_HEIGHT)] }], 'edges': [{ 'start': 'face1', 'end': 'face2', 'predicates': [left_of(), same_value('y1', epsilon=EPSILON)] }, { 'start': 'face2', 'end': 'face3', 'predicates': [left_of(), same_value('y1', epsilon=EPSILON)] }] } three_people = face_lists.filter( payload_satisfies(scene_graph(three_people_scene_graph, exact=True))) # Post-process to display in Esper widget return intrvllists_to_result_bbox(three_people.get_allintervals(), limit=100, stride=100)
def man_woman_up_close(): from query.models import FaceGender from rekall.video_interval_collection import VideoIntervalCollection from rekall.parsers import in_array, bbox_payload_parser, merge_dict_parsers, dict_payload_parser from rekall.merge_ops import payload_plus from esper.rekall import intrvllists_to_result_bbox from rekall.payload_predicates import payload_satisfies from rekall.spatial_predicates import scene_graph from rekall.bbox_predicates import height_at_least MIN_FACE_CONFIDENCE = 0.95 MIN_GENDER_CONFIDENCE = 0.95 MIN_FACE_HEIGHT = 0.6 # Annotate face rows with start and end frames and the video ID faces_with_gender= FaceGender.objects.filter(face__frame__video__name=video_name).annotate( min_frame=F('face__frame__number'), max_frame=F('face__frame__number'), video_id=F('face__frame__video_id'), bbox_x1=F('face__bbox_x1'), bbox_y1=F('face__bbox_y1'), bbox_x2=F('face__bbox_x2'), bbox_y2=F('face__bbox_y2'), gender_name=F('gender__name'), face_probability=F('face__probability')) faces = VideoIntervalCollection.from_django_qs( faces_with_gender, with_payload=in_array(merge_dict_parsers([ bbox_payload_parser(VideoIntervalCollection.django_accessor), dict_payload_parser(VideoIntervalCollection.django_accessor, { 'gender': 'gender_name' }), dict_payload_parser(VideoIntervalCollection.django_accessor, { 'gender_probability': 'probability' }), dict_payload_parser(VideoIntervalCollection.django_accessor, { 'face_probability': 'face_probability' }) ])) ).coalesce(payload_merge_op=payload_plus) graph = { 'nodes': [ { 'name': 'face_male', 'predicates': [ height_at_least(MIN_FACE_HEIGHT), lambda payload: payload['gender'] is 'M', lambda payload: payload['face_probability'] > MIN_FACE_CONFIDENCE, lambda payload: payload['gender_probability'] > MIN_GENDER_CONFIDENCE ] }, { 'name': 'face_female', 'predicates': [ height_at_least(MIN_FACE_HEIGHT), lambda payload: payload['gender'] is 'F', lambda payload: payload['face_probability'] > MIN_FACE_CONFIDENCE, lambda payload: payload['gender_probability'] > MIN_GENDER_CONFIDENCE ] }, ], 'edges': [] } mf_up_close = faces.filter(payload_satisfies( scene_graph(graph, exact=True))) return intrvllists_to_result_bbox(mf_up_close.get_allintervals(), limit=100, stride=100)
def panels_rekall(): from query.models import LabeledCommercial, Face from rekall.video_interval_collection import VideoIntervalCollection from rekall.parsers import in_array, bbox_payload_parser from rekall.merge_ops import payload_plus from rekall.bbox_predicates import height_at_least, same_value, left_of from rekall.spatial_predicates import scene_graph from rekall.payload_predicates import payload_satisfies from esper.rekall import intrvllists_to_result_bbox # Get list of sandbox video IDs sandbox_videos = [ row.video_id for row in LabeledCommercial.objects.distinct('video_id') ] faces_qs = Face.objects.filter(shot__video_id__in=sandbox_videos).annotate( video_id=F("shot__video_id"), min_frame=F("shot__min_frame"), max_frame=F("shot__max_frame") ) # One interval for each face faces = VideoIntervalCollection.from_django_qs( faces_qs, with_payload=in_array( bbox_payload_parser( VideoIntervalCollection.django_accessor))) # Merge shots faces = faces.coalesce(payload_merge_op=payload_plus) # Define a scene graph for things that look like panels three_faces_scene_graph = { 'nodes': [ { 'name': 'face1', 'predicates': [ height_at_least(0.3) ] }, { 'name': 'face2', 'predicates': [ height_at_least(0.3) ] }, { 'name': 'face3', 'predicates': [ height_at_least(0.3) ] } ], 'edges': [ { 'start': 'face1', 'end': 'face2', 'predicates': [ same_value('y1', epsilon=0.05), left_of() ] }, { 'start': 'face2', 'end': 'face3', 'predicates': [ same_value('y1', epsilon=0.05), left_of() ] }, ] } panels = faces.filter(payload_satisfies( scene_graph(three_faces_scene_graph, exact=True) )) return intrvllists_to_result_bbox(panels.get_allintervals())
def shot_reverse_shot_complex(): from query.models import Face, Shot from rekall.temporal_predicates import overlaps from rekall.merge_ops import payload_second, payload_plus from rekall.video_interval_collection import VideoIntervalCollection from rekall.interval_list import Interval, IntervalList from rekall.parsers import in_array, bbox_payload_parser from rekall.payload_predicates import payload_satisfies from rekall.list_predicates import length_at_most from rekall.logical_predicates import and_pred from rekall.spatial_predicates import scene_graph, make_region from rekall.temporal_predicates import before, after from rekall.bbox_predicates import height_at_least from esper.rekall import intrvllists_to_result_with_objects VIDEO_NAME = 'godfather part iii' MAX_FACE_MOVEMENT = 0.15 MIN_FACE_HEIGHT = 0.2 MAX_FACES_ON_SCREEN = 4 RIGHT_HALF_MIN_X = 0.33 LEFT_HALF_MAX_X = 0.66 SHOTS_LABELER_ID = 64 # faces are sampled every 12 frames SAMPLING_RATE = 12 # Annotate face rows with start and end frames and the video ID faces = Face.objects.annotate(min_frame=F('frame__number'), max_frame=F('frame__number'), video_id=F('frame__video_id')).filter( frame__video__name__contains=VIDEO_NAME) shots = VideoIntervalCollection.from_django_qs(Shot.objects.filter( video__name__contains=VIDEO_NAME, labeler_id=SHOTS_LABELER_ID), with_payload=lambda obj: []) # vids are all faces for each frame vids = VideoIntervalCollection.from_django_qs( faces.filter(probability__gte=0.99), with_payload=in_array( bbox_payload_parser( VideoIntervalCollection.django_accessor))).coalesce( payload_merge_op=payload_plus) right_half = make_region(RIGHT_HALF_MIN_X, 0.0, 1.0, 1.0) left_half = make_region(0.0, 0.0, LEFT_HALF_MAX_X, 1.0) graph = { 'nodes': [{ 'name': 'face', 'predicates': [height_at_least(MIN_FACE_HEIGHT)] }], 'edges': [] } faces_on_right = vids.filter( and_pred(payload_satisfies(length_at_most(MAX_FACES_ON_SCREEN)), payload_satisfies(scene_graph(graph, region=right_half)))) faces_on_left = vids.filter( and_pred(payload_satisfies(length_at_most(MAX_FACES_ON_SCREEN)), payload_satisfies(scene_graph(graph, region=left_half)))) def wrap_list(intvl): intvl.payload = [intvl.payload] return intvl def get_height(box): return box['y2'] - box['y1'] def get_center(box): return ((box['x1'] + box['x2']) / 2, (box['y1'] + box['y2']) / 2) def get_distance(pt1, pt2): return np.sqrt((pt1[0] - pt2[0])**2 + (pt1[1] - pt2[1])**2) def find_highest_box(boxes): if len(boxes) == 0: return None result = boxes[0] best = get_height(result) for i in range(1, len(boxes)): h = get_height(boxes[i]) if h > best: best = h result = boxes[i] return result def take_highest_in_frame(intvl): result = [] for faces_in_frame in intvl.payload: largest = find_highest_box(faces_in_frame) if largest is not None: result.append(largest) intvl.payload = result return intvl # Check if displacement of box center between frames are within `dist` def inter_frame_movement_less_than(dist): def check(boxes): for b1, b2 in zip(boxes, boxes[1:]): if get_distance(get_center(b1), get_center(b2)) > dist: return False return True return check # Payload is a list, each element is a list of faces for a frame shots_with_face_on_right = shots.merge( faces_on_right, predicate=overlaps(), payload_merge_op=payload_second).map(wrap_list).coalesce( payload_merge_op=payload_plus).map(take_highest_in_frame).filter( payload_satisfies( inter_frame_movement_less_than(MAX_FACE_MOVEMENT))) shots_with_face_on_left = shots.merge( faces_on_left, predicate=overlaps(), payload_merge_op=payload_second).map(wrap_list).coalesce( payload_merge_op=payload_plus).map(take_highest_in_frame).filter( payload_satisfies( inter_frame_movement_less_than(MAX_FACE_MOVEMENT))) # Right-Left-Right sequences shot_reverse_shot_1 = shots_with_face_on_right.merge( shots_with_face_on_left, predicate=before(max_dist=1)).merge(shots_with_face_on_right, predicate=before(max_dist=1)) # Left-Right-Left sequences shot_reverse_shot_2 = shots_with_face_on_left.merge( shots_with_face_on_right, predicate=before(max_dist=1)).merge(shots_with_face_on_left, predicate=before(max_dist=1)) shot_reverse_shot = shot_reverse_shot_1.set_union( shot_reverse_shot_2).coalesce() result = intrvllists_to_result_with_objects( shot_reverse_shot.get_allintervals(), payload_to_objs=lambda p, v: []) return result
def shot_reverse_shot(): from query.models import Face from rekall.video_interval_collection import VideoIntervalCollection from rekall.parsers import in_array, bbox_payload_parser from rekall.merge_ops import payload_plus from esper.rekall import intrvllists_to_result_bbox from rekall.payload_predicates import payload_satisfies from rekall.list_predicates import length_at_most from rekall.logical_predicates import and_pred, or_pred from rekall.spatial_predicates import scene_graph, make_region from rekall.temporal_predicates import before, after from rekall.bbox_predicates import height_at_least from esper.rekall import intrvllists_to_result, intrvllists_to_result_with_objects, add_intrvllists_to_result # If True, visualize results in a timeline TIMELINE_OUTPUT = False RIGHT_HALF_MIN_X = 0.45 LEFT_HALF_MAX_X = 0.55 MIN_FACE_HEIGHT = 0.4 MAX_FACES_ON_SCREEN = 2 # faces are sampled every 12 frames SAMPLING_RATE = 12 ONE_SECOND = 24 FOUR_SECONDS = 96 TEN_SECONDS = 240 # Annotate face rows with start and end frames and the video ID faces = Face.objects.annotate(min_frame=F('frame__number'), max_frame=F('frame__number'), video_id=F('frame__video_id')) right_half = make_region(RIGHT_HALF_MIN_X, 0.0, 1.0, 1.0) left_half = make_region(0.0, 0.0, LEFT_HALF_MAX_X, 1.0) graph = { 'nodes': [{ 'name': 'face', 'predicates': [height_at_least(MIN_FACE_HEIGHT)] }], 'edges': [] } vids = VideoIntervalCollection.from_django_qs( faces, with_payload=in_array( bbox_payload_parser( VideoIntervalCollection.django_accessor))).coalesce( payload_merge_op=payload_plus) # Get sequences where there's a face on the right half of the screen and # there are at most two faces faces_on_right = vids.filter( and_pred(payload_satisfies(length_at_most(MAX_FACES_ON_SCREEN)), payload_satisfies(scene_graph( graph, region=right_half)))).dilate(SAMPLING_RATE / 2).coalesce() # Get sequences where there's a face on the left half of the screen and # there are at most two faces faces_on_left = vids.filter( and_pred(payload_satisfies(length_at_most(MAX_FACES_ON_SCREEN)), payload_satisfies(scene_graph( graph, region=left_half)))).dilate(SAMPLING_RATE / 2).coalesce() # Sequences where faces on left up to one second before/after faces on left # Four seconds of buffer time between left-then-right/right-then-left # segments # Only keep remaining sequences that last longer than ten seconds shot_reverse_shot = faces_on_right.merge( faces_on_left, predicate=or_pred( before(max_dist=ONE_SECOND), after(max_dist=ONE_SECOND), arity=2)).dilate(FOUR_SECONDS).coalesce().dilate( -1 * FOUR_SECONDS).filter_length(min_length=TEN_SECONDS) # Post-process to display in Esper widget if TIMELINE_OUTPUT: results = intrvllists_to_result(shot_reverse_shot.get_allintervals()) add_intrvllists_to_result(results, faces_on_left.get_allintervals(), color='black') add_intrvllists_to_result(results, faces_on_right.get_allintervals(), color='green') else: results = intrvllists_to_result_with_objects( shot_reverse_shot.get_allintervals(), lambda payload, video: []) return results
def harry_ron_hermione(): from query.models import FaceCharacterActor from rekall.video_interval_collection import VideoIntervalCollection from rekall.parsers import in_array, bbox_payload_parser, merge_dict_parsers, dict_payload_parser from rekall.merge_ops import payload_plus from rekall.payload_predicates import payload_satisfies from rekall.spatial_predicates import scene_graph from rekall.bbox_predicates import height_at_least, left_of, same_value, same_height from esper.rekall import intrvllists_to_result_bbox MIN_FACE_HEIGHT = 0.25 EPSILON = 0.15 NAMES = [ 'ron weasley', 'harry potter', 'hermione granger' ] # Annotate face rows with start and end frames and the video ID faces_with_character_actor_qs = FaceCharacterActor.objects.annotate( min_frame=F('face__frame__number'), max_frame=F('face__frame__number'), video_id=F('face__frame__video_id'), bbox_x1=F('face__bbox_x1'), bbox_y1=F('face__bbox_y1'), bbox_x2=F('face__bbox_x2'), bbox_y2=F('face__bbox_y2'), character_name=F('characteractor__character__name') ).filter(face__frame__video__name__contains="harry potter") faces_with_identity = VideoIntervalCollection.from_django_qs( faces_with_character_actor_qs, with_payload=in_array(merge_dict_parsers([ bbox_payload_parser(VideoIntervalCollection.django_accessor), dict_payload_parser(VideoIntervalCollection.django_accessor, { 'character': 'character_name' }), ])) ).coalesce(payload_merge_op=payload_plus) harry_ron_hermione_scene_graph = { 'nodes': [ { 'name': 'face1', 'predicates': [ height_at_least(MIN_FACE_HEIGHT), lambda f: f['character'] == NAMES[0] ] }, { 'name': 'face2', 'predicates': [ height_at_least(MIN_FACE_HEIGHT), lambda f: f['character'] == NAMES[1] ] }, { 'name': 'face3', 'predicates': [ height_at_least(MIN_FACE_HEIGHT), lambda f: f['character'] == NAMES[2] ] } ], 'edges': [ { 'start': 'face1', 'end': 'face2', 'predicates': [ same_value('y1', epsilon=EPSILON), same_height(epsilon=EPSILON) ] }, { 'start': 'face2', 'end': 'face3', 'predicates': [ same_value('y1', epsilon=EPSILON), same_height(epsilon=EPSILON) ] }, { 'start': 'face1', 'end': 'face3', 'predicates': [ same_value('y1', epsilon=EPSILON), same_height(epsilon=EPSILON) ] } ] } harry_ron_hermione = faces_with_identity.filter(payload_satisfies(scene_graph( harry_ron_hermione_scene_graph, exact=True ))) return intrvllists_to_result_bbox(harry_ron_hermione.get_allintervals(), limit=100, stride=10)
def shot_reverse_shot_intensification(): from query.models import Face from rekall.video_interval_collection import VideoIntervalCollection from rekall.parsers import in_array, bbox_payload_parser, merge_dict_parsers, named_payload from rekall.merge_ops import payload_plus, merge_named_payload, payload_first from esper.rekall import intrvllists_to_result_bbox from rekall.payload_predicates import payload_satisfies, on_name from rekall.list_predicates import length_at_most from rekall.logical_predicates import and_pred, or_pred from rekall.spatial_predicates import scene_graph, make_region from rekall.temporal_predicates import before, after from rekall.bbox_predicates import height_at_least from esper.rekall import intrvllists_to_result, intrvllists_to_result_with_objects, add_intrvllists_to_result # If True, visualize results in a timeline TIMELINE_OUTPUT = False RIGHT_HALF_MIN_X = 0.45 LEFT_HALF_MAX_X = 0.55 MIN_FACE_HEIGHT = 0.4 MAX_FACES_ON_SCREEN = 2 # faces are sampled every 12 frames SAMPLING_RATE = 12 ONE_SECOND = 24 FOUR_SECONDS = 96 TEN_SECONDS = 240 # Annotate face rows with start and end frames and the video ID faces = Face.objects.annotate(min_frame=F('frame__number'), max_frame=F('frame__number'), video_id=F('frame__video_id'), shot_scale=F('frame__shot_scale')) right_half = make_region(RIGHT_HALF_MIN_X, 0.0, 1.0, 1.0) left_half = make_region(0.0, 0.0, LEFT_HALF_MAX_X, 1.0) graph = { 'nodes': [{ 'name': 'face', 'predicates': [height_at_least(MIN_FACE_HEIGHT)] }], 'edges': [] } vids = VideoIntervalCollection.from_django_qs( faces, with_payload=merge_dict_parsers([ named_payload( 'faces', in_array( bbox_payload_parser( VideoIntervalCollection.django_accessor))), named_payload('shot_scale', in_array(lambda obj: obj.shot_scale)) ])).coalesce( payload_merge_op=merge_named_payload({ 'faces': payload_plus, 'shot_scale': payload_first })) def shot_scales_decreasing(scales): if len(scales) <= 1: return True cur_scale = scales[0] for scale in scales: if cur_scale == 1: cur_scale = scale continue if scale == 1: continue if scale < cur_scale: # Shot scale has gotten father here return False return True # Get sequences where there's a face on the right half of the screen and # there are at most two faces # Payload is the faces in the first frame, and a list of the shot scales # throughout the sequence # Filter out any sequences where the shot scale gets farther away over the sequence faces_on_right = vids.filter( and_pred( payload_satisfies( on_name('faces', length_at_most(MAX_FACES_ON_SCREEN))), payload_satisfies( on_name('faces', scene_graph( graph, region=right_half))))).dilate(SAMPLING_RATE / 2).coalesce( payload_merge_op=merge_named_payload( { 'faces': payload_first, 'shot_scale': payload_plus })).filter(lambda intrvl: shot_scales_decreasing( intrvl.get_payload()['shot_scale'])) # Get sequences where there's a face on the left half of the screen and # there are at most two faces # Payload is the faces in the first frame, and a list of the shot scales # throughout the sequence faces_on_left = vids.filter( and_pred( payload_satisfies( on_name('faces', length_at_most(MAX_FACES_ON_SCREEN))), payload_satisfies( on_name('faces', scene_graph( graph, region=left_half))))).dilate(SAMPLING_RATE / 2).coalesce( payload_merge_op=merge_named_payload( { 'faces': payload_first, 'shot_scale': payload_plus })).filter(lambda intrvl: shot_scales_decreasing( intrvl.get_payload()['shot_scale'])) # Sequences where faces on left up to one second before/after faces on left # Four seconds of buffer time between left-then-right/right-then-left # segments # Filter sequences by decreasing shot sequences # Only keep remaining sequences that last longer than ten seconds shot_reverse_shot_intensification = faces_on_right.merge( faces_on_left, predicate=before(max_dist=ONE_SECOND)).set_union( faces_on_left.merge(faces_on_right, predicate=before(max_dist=ONE_SECOND)) ).dilate(FOUR_SECONDS).coalesce( payload_merge_op=merge_named_payload({ 'faces': payload_first, 'shot_scale': payload_plus })).dilate( -1 * FOUR_SECONDS).filter(lambda intrvl: shot_scales_decreasing( intrvl.get_payload()['shot_scale'])).filter_length( min_length=TEN_SECONDS) def non_uniform(shot_scales): return (len(set(shot_scales)) > 2 if 1 in set(shot_scales) else len(set(shot_scales)) > 1) # Finally, filter out any shot sequences where the shot scales are uniform shot_reverse_shot_intensification = shot_reverse_shot_intensification.filter( lambda intrvl: non_uniform(intrvl.get_payload()['shot_scale'])) # Post-process to display in Esper widget if TIMELINE_OUTPUT: results = intrvllists_to_result( shot_reverse_shot_intensification.get_allintervals()) add_intrvllists_to_result(results, faces_on_left.get_allintervals(), color='black') add_intrvllists_to_result(results, faces_on_right.get_allintervals(), color='green') else: results = intrvllists_to_result_with_objects( shot_reverse_shot_intensification.get_allintervals(), lambda payload, video: []) return results
def hero_shot(): from query.models import Face from rekall.video_interval_collection import VideoIntervalCollection from rekall.parsers import named_payload, in_array, bbox_payload_parser from rekall.parsers import merge_dict_parsers, dict_payload_parser from rekall.merge_ops import payload_plus, payload_first, merge_named_payload from rekall.payload_predicates import payload_satisfies, on_name from rekall.spatial_predicates import scene_graph from rekall.logical_predicates import and_pred from rekall.bbox_predicates import height_at_least, left_of, same_value from esper.rekall import intrvllists_to_result_with_objects, bbox_to_result_object # We're going to look for frames that would be good "hero shot" frames -- # potentially good frames to show in a Netflix preview, for instance. # We're going to look for frames where there's exactly one face of a # certain height, and the frame has certain minimum brightness, # sharpness, and contrast properties. MIN_FACE_HEIGHT = 0.2 MIN_BRIGHTNESS = 50 MIN_SHARPNESS = 50 MIN_CONTRAST = 30 FILM_NAME = "star wars the force awakens" # Annotate face rows with start and end frames, video ID, and frame image # information faces_qs = Face.objects.annotate(min_frame=F('frame__number'), max_frame=F('frame__number'), video_id=F('frame__video_id'), brightness=F('frame__brightness'), contrast=F('frame__contrast'), sharpness=F('frame__sharpness')).filter( frame__video__name=FILM_NAME, brightness__isnull=False, contrast__isnull=False, sharpness__isnull=False) # Load bounding boxes and faces into rekall, and put all faces in one frame faces = VideoIntervalCollection.from_django_qs( faces_qs, with_payload=merge_dict_parsers([ named_payload( 'faces', in_array( bbox_payload_parser( VideoIntervalCollection.django_accessor))), dict_payload_parser( VideoIntervalCollection.django_accessor, { 'brightness': 'brightness', 'contrast': 'contrast', 'sharpness': 'sharpness' }) ])).coalesce( merge_named_payload({ 'faces': payload_plus, 'brightness': payload_first, 'contrast': payload_first, 'sharpness': payload_first })) # Hero shots are shots where there is exactly one face of at least a # certain height, and brightness, contrast, and sharpness are at least # some amount hero_shots = faces.filter( payload_satisfies( and_pred( on_name( 'faces', scene_graph( { 'nodes': [{ 'name': 'face', 'predicates': [height_at_least(MIN_FACE_HEIGHT)] }], 'edges': [] }, exact=True)), lambda payload: (payload['brightness'] > MIN_BRIGHTNESS and payload['contrast'] > MIN_CONTRAST and payload['sharpness'] > MIN_SHARPNESS)))) return intrvllists_to_result_with_objects( hero_shots.get_allintervals(), lambda payload, video_id: [bbox_to_result_object(bbox, video_id) for bbox in payload['faces']], limit=100, stride=10)