Ejemplo n.º 1
0
def extreme_close_up_frames():
    from esper.shot_scale import ShotScale, get_all_frames_with_shot_scale
    from esper.rekall import intrvllists_to_result_with_objects, bbox_to_result_object

    def pose_payload_to_object(pose, video):
        return {'id': video, 'type': 'pose', 'keypoints': pose}

    def payload_to_objects(payload, video_id):
        result = []
        result += [
            bbox_to_result_object(x, video_id)
            for x in payload.get('face', [])
        ]
        result += [
            pose_payload_to_object(x, video_id)
            for x in payload.get('pose', [])
        ]
        return result

    video_id = 123
    return intrvllists_to_result_with_objects(get_all_frames_with_shot_scale(
        video_id, ShotScale.EXTREME_CLOSE_UP).get_allintervals(),
                                              payload_to_objects,
                                              limit=1000,
                                              stride=1)
Ejemplo n.º 2
0
def consecutive_short_shots():
    from query.models import Shot
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.temporal_predicates import meets_before
    from esper.rekall import intrvllists_to_result_with_objects
    from django.db.models import ExpressionWrapper, FloatField

    NUM_SHOTS = 3
    MAX_SHOT_DURATION = 0.5

    short_shots = VideoIntervalCollection.from_django_qs(
        Shot.objects.annotate(duration=ExpressionWrapper(
            (F('max_frame') - F('min_frame')) / F('video__fps'),
            output_field=FloatField())).filter(
                duration__lt=MAX_SHOT_DURATION,
                duration__gt=0.,
                labeler__name='shot-hsvhist-face').all())

    n_shots = short_shots
    for n in range(2, NUM_SHOTS + 1):
        print('Constructing {} consecutive short shots'.format(n))

        n_shots = n_shots.merge(
            short_shots, predicate=meets_before(epsilon=1),
            working_window=1).coalesce().filter_length(min_length=1)

        print('There are {} videos with {} consecutive short shots'.format(
            len(n_shots.get_allintervals().keys()), n))

    return intrvllists_to_result_with_objects(n_shots,
                                              lambda a, b: [],
                                              limit=100,
                                              stride=1)
Ejemplo n.º 3
0
def cinematic_shots_rekall():
    from query.models import Shot, Labeler
    from rekall.video_interval_collection import VideoIntervalCollection
    from esper.rekall import intrvllists_to_result_with_objects
    from esper.stdlib import qs_to_result

    video_ids = [1]
    shots_qs = Shot.objects.filter(video_id__in=video_ids, cinematic=True)

    shots = VideoIntervalCollection.from_django_qs(shots_qs)

    return intrvllists_to_result_with_objects(shots.get_allintervals(),
                                              lambda payload, video: [])
Ejemplo n.º 4
0
def all_captions():
    from esper.captions import get_all_segments
    from rekall.video_interval_collection import VideoIntervalCollection
    from esper.rekall import intrvllists_to_result_with_objects
    
    video_ids = [1]

    # Only aligned captions are in the caption index
    results = get_all_segments(video_ids)
    caption_results = VideoIntervalCollection({
        video_id: [(
            word[0] * Video.objects.get(id=video_id).fps, # start frame
            word[1] * Video.objects.get(id=video_id).fps, # end frame
            word[2]) # payload is the word (string)
            for word in words]
        for video_id, words in results
    })
    
    return intrvllists_to_result_with_objects(caption_results, lambda a, b: [])
Ejemplo n.º 5
0
def all_poses():
    from query.models import PoseMeta
    from esper.stdlib import pose_to_dict, simple_result
    import esper.pose_wrapper as pw
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.parsers import in_array
    from rekall.merge_ops import payload_plus
    from esper.rekall import intrvllists_to_result_with_objects

    STRIDE = 1000
    LIMIT = 100

    # PoseMeta is a table that contains pose ID, labeler, and a pointer to
    #   a Frame.
    # NOTE that PoseMeta ID's have NO RELATION to Pose ID's.
    pose_meta_qs = PoseMeta.objects.annotate(min_frame=F('frame__number'),
                                             max_frame=F('frame__number'),
                                             video_id=F('frame__video_id'))

    # Use coalesce to get a list of frames we want
    # We store Video ID and frame number in the payload
    frames = VideoIntervalCollection.from_django_qs(
        pose_meta_qs[:LIMIT * STRIDE:STRIDE],
        with_payload=lambda pose_meta_obj:
        (pose_meta_obj.video_id, pose_meta_obj.min_frame)).coalesce()

    # pose_wrapper.get takes in a PoseMeta queryset or list of PoseMeta objects
    #   and returns a list of PoseWrapper objects.
    poses = frames.map(lambda interval: (
        interval.start, interval.end,
        pw.get(
            pose_meta_qs.filter(video_id=interval.payload[0],
                                min_frame=interval.payload[1]).all())))

    # We use pose_to_dict to draw PoseWrapper objects.
    return intrvllists_to_result_with_objects(
        poses, lambda pose_wrappers, video_id:
        [pose_to_dict(wrapper) for wrapper in pose_wrappers])
Ejemplo n.º 6
0
def all_face_landmarks():
    from query.models import Face
    from esper.stdlib import face_landmarks_to_dict, simple_result
    import esper.face_landmarks_wrapper as flw
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.parsers import in_array
    from rekall.merge_ops import payload_plus
    from esper.rekall import intrvllists_to_result_with_objects

    STRIDE = 1000
    LIMIT = 100

    # Face landmarks are keyed by Face ID's.
    faces_qs = Face.objects.annotate(min_frame=F('frame__number'),
                                     max_frame=F('frame__number'),
                                     video_id=F('frame__video_id'))

    # Use coalesce to get a list of frames we want
    # We store Video ID and frame number in the payload
    frames = VideoIntervalCollection.from_django_qs(
        faces_qs[:LIMIT * STRIDE:STRIDE],
        with_payload=lambda face_obj:
        (face_obj.video_id, face_obj.min_frame)).coalesce()

    # face_landmarks_wrapper.get takes in a Face queryset or list of Face
    #   objects and returns a list of LandmarksWrapper objects.
    landmarks = frames.map(lambda interval: (
        interval.start, interval.end,
        flw.get(
            faces_qs.filter(video_id=interval.payload[0],
                            min_frame=interval.payload[1]).all())))

    # We use face_landmarks_to_dict to draw LandmarksWrapper objects.
    return intrvllists_to_result_with_objects(
        landmarks, lambda landmarks_wrappers, video_id:
        [face_landmarks_to_dict(wrapper) for wrapper in landmarks_wrappers])
Ejemplo n.º 7
0
def caption_search():
    from esper.captions import topic_search
    from rekall.video_interval_collection import VideoIntervalCollection
    from esper.rekall import intrvllists_to_result_with_objects

    phrases = [
        'may the Force be with you', 'may the force be with you',
        'May the Force be with you', 'May the force be with you'
    ]
    results = topic_search(
        phrases,
        window_size=0,
        video_ids=[
            vid.id
            for vid in Video.objects.filter(name__contains="star wars").all()
        ])
    caption_results = VideoIntervalCollection({
        r.id: [((p.start * Video.objects.get(id=r.id).fps),
                (p.end * Video.objects.get(id=r.id).fps), 0)
               for p in r.postings]
        for r in results
    })

    return intrvllists_to_result_with_objects(caption_results, lambda a, b: [])
Ejemplo n.º 8
0
def shot_reverse_shot_complex():
    from query.models import Face, Shot
    from rekall.temporal_predicates import overlaps
    from rekall.merge_ops import payload_second, payload_plus
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.interval_list import Interval, IntervalList
    from rekall.parsers import in_array, bbox_payload_parser
    from rekall.payload_predicates import payload_satisfies
    from rekall.list_predicates import length_at_most
    from rekall.logical_predicates import and_pred
    from rekall.spatial_predicates import scene_graph, make_region
    from rekall.temporal_predicates import before, after
    from rekall.bbox_predicates import height_at_least
    from esper.rekall import intrvllists_to_result_with_objects

    VIDEO_NAME = 'godfather part iii'

    MAX_FACE_MOVEMENT = 0.15
    MIN_FACE_HEIGHT = 0.2
    MAX_FACES_ON_SCREEN = 4
    RIGHT_HALF_MIN_X = 0.33
    LEFT_HALF_MAX_X = 0.66
    SHOTS_LABELER_ID = 64
    # faces are sampled every 12 frames
    SAMPLING_RATE = 12
    # Annotate face rows with start and end frames and the video ID
    faces = Face.objects.annotate(min_frame=F('frame__number'),
                                  max_frame=F('frame__number'),
                                  video_id=F('frame__video_id')).filter(
                                      frame__video__name__contains=VIDEO_NAME)

    shots = VideoIntervalCollection.from_django_qs(Shot.objects.filter(
        video__name__contains=VIDEO_NAME, labeler_id=SHOTS_LABELER_ID),
                                                   with_payload=lambda obj: [])
    # vids are all faces for each frame
    vids = VideoIntervalCollection.from_django_qs(
        faces.filter(probability__gte=0.99),
        with_payload=in_array(
            bbox_payload_parser(
                VideoIntervalCollection.django_accessor))).coalesce(
                    payload_merge_op=payload_plus)

    right_half = make_region(RIGHT_HALF_MIN_X, 0.0, 1.0, 1.0)
    left_half = make_region(0.0, 0.0, LEFT_HALF_MAX_X, 1.0)
    graph = {
        'nodes': [{
            'name': 'face',
            'predicates': [height_at_least(MIN_FACE_HEIGHT)]
        }],
        'edges': []
    }

    faces_on_right = vids.filter(
        and_pred(payload_satisfies(length_at_most(MAX_FACES_ON_SCREEN)),
                 payload_satisfies(scene_graph(graph, region=right_half))))
    faces_on_left = vids.filter(
        and_pred(payload_satisfies(length_at_most(MAX_FACES_ON_SCREEN)),
                 payload_satisfies(scene_graph(graph, region=left_half))))

    def wrap_list(intvl):
        intvl.payload = [intvl.payload]
        return intvl

    def get_height(box):
        return box['y2'] - box['y1']

    def get_center(box):
        return ((box['x1'] + box['x2']) / 2, (box['y1'] + box['y2']) / 2)

    def get_distance(pt1, pt2):
        return np.sqrt((pt1[0] - pt2[0])**2 + (pt1[1] - pt2[1])**2)

    def find_highest_box(boxes):
        if len(boxes) == 0:
            return None
        result = boxes[0]
        best = get_height(result)
        for i in range(1, len(boxes)):
            h = get_height(boxes[i])
            if h > best:
                best = h
                result = boxes[i]
        return result

    def take_highest_in_frame(intvl):
        result = []
        for faces_in_frame in intvl.payload:
            largest = find_highest_box(faces_in_frame)
            if largest is not None:
                result.append(largest)
        intvl.payload = result
        return intvl

    # Check if displacement of box center between frames are within `dist`
    def inter_frame_movement_less_than(dist):
        def check(boxes):
            for b1, b2 in zip(boxes, boxes[1:]):
                if get_distance(get_center(b1), get_center(b2)) > dist:
                    return False
            return True

        return check

    # Payload is a list, each element is a list of faces for a frame
    shots_with_face_on_right = shots.merge(
        faces_on_right, predicate=overlaps(),
        payload_merge_op=payload_second).map(wrap_list).coalesce(
            payload_merge_op=payload_plus).map(take_highest_in_frame).filter(
                payload_satisfies(
                    inter_frame_movement_less_than(MAX_FACE_MOVEMENT)))
    shots_with_face_on_left = shots.merge(
        faces_on_left, predicate=overlaps(),
        payload_merge_op=payload_second).map(wrap_list).coalesce(
            payload_merge_op=payload_plus).map(take_highest_in_frame).filter(
                payload_satisfies(
                    inter_frame_movement_less_than(MAX_FACE_MOVEMENT)))

    # Right-Left-Right sequences
    shot_reverse_shot_1 = shots_with_face_on_right.merge(
        shots_with_face_on_left,
        predicate=before(max_dist=1)).merge(shots_with_face_on_right,
                                            predicate=before(max_dist=1))

    # Left-Right-Left sequences
    shot_reverse_shot_2 = shots_with_face_on_left.merge(
        shots_with_face_on_right,
        predicate=before(max_dist=1)).merge(shots_with_face_on_left,
                                            predicate=before(max_dist=1))

    shot_reverse_shot = shot_reverse_shot_1.set_union(
        shot_reverse_shot_2).coalesce()
    result = intrvllists_to_result_with_objects(
        shot_reverse_shot.get_allintervals(), payload_to_objs=lambda p, v: [])
    return result
Ejemplo n.º 9
0
def shot_reverse_shot():
    from query.models import Face
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.parsers import in_array, bbox_payload_parser
    from rekall.merge_ops import payload_plus
    from esper.rekall import intrvllists_to_result_bbox
    from rekall.payload_predicates import payload_satisfies
    from rekall.list_predicates import length_at_most
    from rekall.logical_predicates import and_pred, or_pred
    from rekall.spatial_predicates import scene_graph, make_region
    from rekall.temporal_predicates import before, after
    from rekall.bbox_predicates import height_at_least
    from esper.rekall import intrvllists_to_result, intrvllists_to_result_with_objects, add_intrvllists_to_result

    # If True, visualize results in a timeline
    TIMELINE_OUTPUT = False

    RIGHT_HALF_MIN_X = 0.45
    LEFT_HALF_MAX_X = 0.55
    MIN_FACE_HEIGHT = 0.4
    MAX_FACES_ON_SCREEN = 2
    # faces are sampled every 12 frames
    SAMPLING_RATE = 12
    ONE_SECOND = 24
    FOUR_SECONDS = 96
    TEN_SECONDS = 240

    # Annotate face rows with start and end frames and the video ID
    faces = Face.objects.annotate(min_frame=F('frame__number'),
                                  max_frame=F('frame__number'),
                                  video_id=F('frame__video_id'))

    right_half = make_region(RIGHT_HALF_MIN_X, 0.0, 1.0, 1.0)
    left_half = make_region(0.0, 0.0, LEFT_HALF_MAX_X, 1.0)

    graph = {
        'nodes': [{
            'name': 'face',
            'predicates': [height_at_least(MIN_FACE_HEIGHT)]
        }],
        'edges': []
    }

    vids = VideoIntervalCollection.from_django_qs(
        faces,
        with_payload=in_array(
            bbox_payload_parser(
                VideoIntervalCollection.django_accessor))).coalesce(
                    payload_merge_op=payload_plus)

    # Get sequences where there's a face on the right half of the screen and
    #   there are at most two faces
    faces_on_right = vids.filter(
        and_pred(payload_satisfies(length_at_most(MAX_FACES_ON_SCREEN)),
                 payload_satisfies(scene_graph(
                     graph, region=right_half)))).dilate(SAMPLING_RATE /
                                                         2).coalesce()

    # Get sequences where there's a face on the left half of the screen and
    #   there are at most two faces
    faces_on_left = vids.filter(
        and_pred(payload_satisfies(length_at_most(MAX_FACES_ON_SCREEN)),
                 payload_satisfies(scene_graph(
                     graph,
                     region=left_half)))).dilate(SAMPLING_RATE / 2).coalesce()

    # Sequences where faces on left up to one second before/after faces on left
    # Four seconds of buffer time between left-then-right/right-then-left
    #   segments
    # Only keep remaining sequences that last longer than ten seconds
    shot_reverse_shot = faces_on_right.merge(
        faces_on_left,
        predicate=or_pred(
            before(max_dist=ONE_SECOND), after(max_dist=ONE_SECOND),
            arity=2)).dilate(FOUR_SECONDS).coalesce().dilate(
                -1 * FOUR_SECONDS).filter_length(min_length=TEN_SECONDS)

    # Post-process to display in Esper widget
    if TIMELINE_OUTPUT:
        results = intrvllists_to_result(shot_reverse_shot.get_allintervals())
        add_intrvllists_to_result(results,
                                  faces_on_left.get_allintervals(),
                                  color='black')
        add_intrvllists_to_result(results,
                                  faces_on_right.get_allintervals(),
                                  color='green')
    else:
        results = intrvllists_to_result_with_objects(
            shot_reverse_shot.get_allintervals(), lambda payload, video: [])
    return results
Ejemplo n.º 10
0
def kissing():
    # Takes 7min to run!
    from query.models import Face, Shot
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.parsers import in_array, bbox_payload_parser, merge_dict_parsers, dict_payload_parser
    from rekall.merge_ops import payload_plus
    from rekall.payload_predicates import payload_satisfies
    from rekall.spatial_predicates import scene_graph
    from rekall.temporal_predicates import overlaps
    from rekall.face_landmark_predicates import looking_left, looking_right
    from rekall.bbox_predicates import height_at_least, same_height
    import esper.face_landmarks_wrapper as flw
    from esper.captions import get_all_segments
    from esper.rekall import intrvllists_to_result_with_objects, bbox_to_result_object
    from esper.stdlib import face_landmarks_to_dict

    MAX_MOUTH_DIFF = 0.12
    MIN_FACE_CONFIDENCE = 0.8
    MIN_FACE_HEIGHT = 0.4
    MAX_FACE_HEIGHT_DIFF = 0.1
    MIN_FACE_OVERLAP_X = 0.05
    MIN_FACE_OVERLAP_Y = 0.2
    MAX_FACE_OVERLAP_X_FRACTION = 0.7
    MIN_FACE_ANGLE = 0.1

    def map_payload(func):
        def map_fn(intvl):
            intvl.payload = func(intvl.payload)
            return intvl

        return map_fn

    def get_landmarks(faces):
        ids = [face['id'] for face in faces]
        landmarks = flw.get(Face.objects.filter(id__in=ids))
        for face, landmark in zip(faces, landmarks):
            face['landmarks'] = landmark
        return faces

    # Annotate face rows with start and end frames and the video ID
    faces_qs = Face.objects.filter(
        probability__gte=MIN_FACE_CONFIDENCE).annotate(
            min_frame=F('frame__number'),
            max_frame=F('frame__number'),
            height=F('bbox_y2') - F('bbox_y1'),
            video_id=F('frame__video_id')).filter(height__gte=MIN_FACE_HEIGHT)

    faces = VideoIntervalCollection.from_django_qs(
        faces_qs,
        with_payload=in_array(
            merge_dict_parsers([
                bbox_payload_parser(VideoIntervalCollection.django_accessor),
                dict_payload_parser(VideoIntervalCollection.django_accessor,
                                    {'id': 'id'})
            ]))).coalesce(payload_merge_op=payload_plus)

    graph = {
        'nodes': [
            {
                'name': 'face_left',
                'predicates': []
            },
            {
                'name': 'face_right',
                'predicates': []
            },
        ],
        'edges': [
            {
                'start':
                'face_left',
                'end':
                'face_right',
                'predicates': [
                    lambda f1, f2: f1['x2'] < f2['x2'] and f1['x1'] < f2[
                        'x1'],  # Left face on the left
                    lambda f1, f2: f1['x2'] - f2['x1'] >
                    MIN_FACE_OVERLAP_X,  # Faces overlap
                    lambda f1, f2: min(f1['y2'], f2['y2']) - max(
                        f1['y1'], f1['y1']) > MIN_FACE_OVERLAP_Y,
                    lambda f1, f2: f1['y2'] > f2['y1'] and f1['y1'] < f2[
                        'y2'],  # No face is entirely above another
                    same_height(MAX_FACE_HEIGHT_DIFF),
                    lambda f1, f2:
                    (f1['x2'] - f2['x1']) / max(f1['x2'] - f1['x1'], f2[
                        'x2'] - f2['x1']) < MAX_FACE_OVERLAP_X_FRACTION
                ]
            },
        ]
    }

    def mouths_are_close(lm1, lm2):
        select_outer = [2, 3, 4, 8, 9, 10]
        select_inner = [1, 2, 3, 5, 6, 7]
        mouth1 = np.concatenate(
            (lm1.outer_lips()[select_outer], lm1.inner_lips()[select_inner]))
        mouth2 = np.concatenate(
            (lm2.outer_lips()[select_outer], lm2.inner_lips()[select_inner]))
        mean1 = np.mean(mouth1, axis=0)
        mean2 = np.mean(mouth2, axis=0)
        return np.linalg.norm(mean1 - mean2) <= MAX_MOUTH_DIFF

    # Face is profile if both eyes are on the same side of the nose bridge horizontally.
    def is_left_profile(f):
        lm = f['landmarks']
        nose_x = min(lm.nose_bridge()[:, 0])
        left = np.all(lm.left_eye()[:, 0] >= nose_x)
        right = np.all(lm.right_eye()[:, 0] >= nose_x)
        return left and right

    def is_right_profile(f):
        lm = f['landmarks']
        nose_x = max(lm.nose_bridge()[:, 0])
        left = np.all(lm.left_eye()[:, 0] <= nose_x)
        right = np.all(lm.right_eye()[:, 0] <= nose_x)
        return left and right

    # Line is ax+by+c=0
    def project_point_to_line(pt, a, b, c):
        x0, y0 = pt[0], pt[1]
        d = a * a + b * b
        x = (b * (b * x0 - a * y0) - a * c) / d
        y = (a * (-b * x0 + a * y0) - b * c) / d
        return np.array([x, y])

    # Positive if facing right
    def signed_face_angle(lm):
        center_line_indices = [27, 28, 32, 33, 34, 51, 62, 66, 57]
        data = lm.landmarks[center_line_indices]
        fit = np.polyfit(data[:, 0], data[:, 1], 1)
        # y = ax+b
        a, b = fit[0], fit[1]
        A = project_point_to_line(lm.landmarks[center_line_indices[0]], a, -1,
                                  b)
        B = project_point_to_line(lm.landmarks[center_line_indices[-1]], a, -1,
                                  b)
        AB = B - A
        AB = AB / np.linalg.norm(AB)
        C = np.mean(lm.nose_bridge()[2:4], axis=0)
        AC = C - A
        AC = AC / np.linalg.norm(AC)
        return np.cross(AB, AC)

    graph2 = {
        'nodes': [
            {
                'name':
                'left',
                'predicates': [
                    lambda f: signed_face_angle(f['landmarks']) >
                    MIN_FACE_ANGLE
                    #                 is_right_profile
                ]
            },
            {
                'name':
                'right',
                'predicates': [
                    lambda f: signed_face_angle(f['landmarks']) <
                    -MIN_FACE_ANGLE
                    #                 is_left_profile
                ]
            },
        ],
        'edges': [{
            'start':
            'left',
            'end':
            'right',
            'predicates': [
                lambda l, r: mouths_are_close(l['landmarks'], r['landmarks']),
            ]
        }]
    }

    mf_up_close = faces.filter(
        payload_satisfies(scene_graph(graph, exact=True))).map(
            map_payload(get_landmarks)).filter(
                payload_satisfies(scene_graph(graph2, exact=True)))
    vids = mf_up_close.get_allintervals().keys()
    # Merge with shots
    shots_qs = Shot.objects.filter(
        video_id__in=vids,
        labeler=Labeler.objects.get(name='shot-hsvhist-face')).all()
    total = shots_qs.count()
    print("Total shots:", total)
    # use emtpy list as payload
    shots = VideoIntervalCollection.from_django_qs(shots_qs,
                                                   with_payload=lambda row: [],
                                                   progress=True,
                                                   total=total)
    kissing_shots = mf_up_close.join(shots,
                                     lambda kiss, shot: [(kiss.get_start(
                                     ), shot.get_end(), kiss.get_payload())],
                                     predicate=overlaps(),
                                     working_window=1).coalesce()

    # Getting faces in the shot
    def wrap_in_list(intvl):
        intvl.payload = [intvl.payload]
        return intvl

    print("Getting faces...")
    faces_qs2 = Face.objects.filter(frame__video_id__in=vids,
                                    probability__gte=MIN_FACE_CONFIDENCE)
    total = faces_qs2.count()
    faces2 = VideoIntervalCollection.from_django_qs(
        faces_qs2.annotate(min_frame=F('frame__number'),
                           max_frame=F('frame__number'),
                           video_id=F('frame__video_id')),
        with_payload=in_array(
            merge_dict_parsers([
                bbox_payload_parser(VideoIntervalCollection.django_accessor),
                dict_payload_parser(VideoIntervalCollection.django_accessor,
                                    {'frame': 'min_frame'})
            ])),
        progress=True,
        total=total).coalesce(payload_merge_op=payload_plus).map(wrap_in_list)

    def clip_to_last_frame_with_two_faces(intvl):
        faces = intvl.get_payload()[1]
        two_faces = [(f[0], f[1]) for f in faces if len(f) == 2]
        two_high_faces = [
            (a, b) for a, b in two_faces
            if min(a['y2'] - a['y1'], b['y2'] - b['y1']) >= MIN_FACE_HEIGHT
        ]
        frame = [a['frame'] for a, b in two_high_faces]

        if len(frame) > 0:
            intvl.end = frame[-1]
        return intvl

    clipped_kissing_shots = kissing_shots.merge(
        faces2,
        payload_merge_op=lambda p1, p2: (p1, p2),
        predicate=overlaps(),
        working_window=1).coalesce(
            payload_merge_op=lambda p1, p2: (p1[0], p1[1] + p2[1])).map(
                clip_to_last_frame_with_two_faces).filter_length(min_length=12)

    results = get_all_segments(vids)
    fps_map = dict((i, Video.objects.get(id=i).fps) for i in vids)
    caption_results = VideoIntervalCollection({
        video_id: [
            (
                word[0] * fps_map[video_id],  # start frame
                word[1] * fps_map[video_id],  # end frame
                word[2])  # payload is the word
            for word in words
        ]
        for video_id, words in results
    })
    kissing_without_words = clipped_kissing_shots.minus(caption_results)
    kissing_final = kissing_without_words.map(lambda intvl: (int(
        intvl.start), int(intvl.end), intvl.payload)).coalesce().filter_length(
            min_length=12)

    def payload_to_objects(p, video_id):
        return [face_landmarks_to_dict(face['landmarks']) for face in p[0]
                ] + [bbox_to_result_object(face, video_id) for face in p[0]]

    return intrvllists_to_result_with_objects(
        kissing_final.get_allintervals(),
        lambda p, vid: payload_to_objects(p, vid),
        stride=1)
Ejemplo n.º 11
0
def reaction_shots_apollo_13():
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.merge_ops import payload_plus
    from rekall.payload_predicates import payload_satisfies
    from rekall.temporal_predicates import overlaps
    from rekall.parsers import in_array, merge_dict_parsers, bbox_payload_parser, dict_payload_parser
    from esper.caption_metadata import caption_metadata_for_video
    from esper.captions import get_all_segments
    from esper.rekall import intrvllists_to_result_with_objects
    from query.models import FaceCharacterActor, Shot

    videos = Video.objects.filter(name__contains="apollo 13").all()

    # Load script data
    metadata = VideoIntervalCollection({
        video.id: caption_metadata_for_video(video.id)
        for video in videos
    }).filter(lambda meta_interval:
              (meta_interval.payload['speaker'] is not None and "man's voice"
               not in meta_interval.payload['speaker'] and meta_interval.
               payload['speaker'].strip() != "gene krantz"))

    all_segments = get_all_segments([video.id for video in videos])

    captions_interval_collection = VideoIntervalCollection(
        {video: intervals
         for video, intervals in all_segments})

    captions_with_speaker_id = captions_interval_collection.overlaps(
        metadata.filter(payload_satisfies(lambda p: p['aligned'])),
        payload_merge_op=lambda word, script_meta:
        (word[0], script_meta['speaker']))

    # Annotate face rows with start and end frames and the video ID
    faces_with_character_actor_qs = FaceCharacterActor.objects.annotate(
        min_frame=F('face__frame__number'),
        max_frame=F('face__frame__number'),
        video_id=F('face__frame__video_id'),
        character_name=F('characteractor__character__name')).filter(
            video_id__in=[v.id for v in videos])

    frames_with_identity = VideoIntervalCollection.from_django_qs(
        faces_with_character_actor_qs,
        with_payload=in_array(
            dict_payload_parser(VideoIntervalCollection.django_accessor,
                                {'character': 'character_name'}), )).coalesce(
                                    payload_merge_op=payload_plus)

    # Annotate shots with all the people in them
    shots_qs = Shot.objects.filter(
        cinematic=True,
        video_id__in=[v.id for v in videos]).annotate(fps=F('video__fps'))
    shots = VideoIntervalCollection.from_django_qs(
        shots_qs, with_payload=lambda shot: shot.fps)

    # Annotate shots with mode shot scale
    frames_with_shot_scale_qs = Frame.objects.filter(
        regularly_sampled=True,
        video_id__in=[v.id for v in videos
                      ]).annotate(min_frame=F('number'),
                                  max_frame=F('number'),
                                  shot_scale_name=F('shot_scale__name')).all()
    frames_with_shot_scale = VideoIntervalCollection.from_django_qs(
        frames_with_shot_scale_qs, with_payload=lambda f: f.shot_scale_name)

    def get_mode(items):
        return max(set(items), key=items.count)

    shots_with_scale = shots.merge(
        frames_with_shot_scale,
        predicate=overlaps(),
        payload_merge_op=lambda shot_fps, shot_scale: [(shot_fps, shot_scale)]
    ).coalesce(payload_merge_op=payload_plus).map(
        lambda intrvl: (intrvl.start, intrvl.end, {
            'fps': intrvl.payload[0][0],
            'shot_scale': get_mode([p[1] for p in intrvl.payload])
        }))

    shots_with_people_in_them = shots_with_scale.overlaps(
        frames_with_identity,
        payload_merge_op=lambda shot_payload, identities:
        (shot_payload, identities),
        working_window=1).coalesce(payload_merge_op=lambda p1, p2: (p1[0], p1[
            1] + p2[1])).map(lambda intrvl: (intrvl.start / intrvl.payload[0][
                'fps'], intrvl.end / intrvl.payload[0]['fps'], {
                    'fps':
                    intrvl.payload[0]['fps'],
                    'shot_scale':
                    intrvl.payload[0]['shot_scale'],
                    'characters':
                    set([
                        name.strip().split(' ')[0].strip() for d in intrvl.
                        payload[1] for name in d['character'].split('/')
                        if len(name.strip()) > 0
                    ])
                }))

    reaction_shots = captions_with_speaker_id.overlaps(
        shots_with_people_in_them.filter(
            payload_satisfies(
                lambda p: p['shot_scale'] in
                ['medium_close_up', 'close_up', 'extreme_close_up'])),
        predicate=lambda captions, shots: captions.payload[1].strip().split(
            ' ')[0] not in shots.payload['characters'],
        payload_merge_op=lambda word_and_speaker, fps_and_characters:
        (fps_and_characters['fps'], word_and_speaker)).map(lambda intrvl: (
            int(intrvl.start * intrvl.payload[0]),
            int(intrvl.end * intrvl.payload[0]), [intrvl.payload[1]])).dilate(
                12).coalesce(
                    payload_merge_op=payload_plus).dilate(-12).filter_length(
                        min_length=12)

    return intrvllists_to_result_with_objects(reaction_shots, lambda a, b: [])
def shot_reverse_shot_intensification():
    from query.models import Face
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.parsers import in_array, bbox_payload_parser, merge_dict_parsers, named_payload
    from rekall.merge_ops import payload_plus, merge_named_payload, payload_first
    from esper.rekall import intrvllists_to_result_bbox
    from rekall.payload_predicates import payload_satisfies, on_name
    from rekall.list_predicates import length_at_most
    from rekall.logical_predicates import and_pred, or_pred
    from rekall.spatial_predicates import scene_graph, make_region
    from rekall.temporal_predicates import before, after
    from rekall.bbox_predicates import height_at_least
    from esper.rekall import intrvllists_to_result, intrvllists_to_result_with_objects, add_intrvllists_to_result

    # If True, visualize results in a timeline
    TIMELINE_OUTPUT = False

    RIGHT_HALF_MIN_X = 0.45
    LEFT_HALF_MAX_X = 0.55
    MIN_FACE_HEIGHT = 0.4
    MAX_FACES_ON_SCREEN = 2
    # faces are sampled every 12 frames
    SAMPLING_RATE = 12
    ONE_SECOND = 24
    FOUR_SECONDS = 96
    TEN_SECONDS = 240

    # Annotate face rows with start and end frames and the video ID
    faces = Face.objects.annotate(min_frame=F('frame__number'),
                                  max_frame=F('frame__number'),
                                  video_id=F('frame__video_id'),
                                  shot_scale=F('frame__shot_scale'))

    right_half = make_region(RIGHT_HALF_MIN_X, 0.0, 1.0, 1.0)
    left_half = make_region(0.0, 0.0, LEFT_HALF_MAX_X, 1.0)

    graph = {
        'nodes': [{
            'name': 'face',
            'predicates': [height_at_least(MIN_FACE_HEIGHT)]
        }],
        'edges': []
    }

    vids = VideoIntervalCollection.from_django_qs(
        faces,
        with_payload=merge_dict_parsers([
            named_payload(
                'faces',
                in_array(
                    bbox_payload_parser(
                        VideoIntervalCollection.django_accessor))),
            named_payload('shot_scale', in_array(lambda obj: obj.shot_scale))
        ])).coalesce(
            payload_merge_op=merge_named_payload({
                'faces': payload_plus,
                'shot_scale': payload_first
            }))

    def shot_scales_decreasing(scales):
        if len(scales) <= 1:
            return True
        cur_scale = scales[0]
        for scale in scales:
            if cur_scale == 1:
                cur_scale = scale
                continue
            if scale == 1:
                continue
            if scale < cur_scale:
                # Shot scale has gotten father here
                return False
        return True

    # Get sequences where there's a face on the right half of the screen and
    #   there are at most two faces
    # Payload is the faces in the first frame, and a list of the shot scales
    #   throughout the sequence
    # Filter out any sequences where the shot scale gets farther away over the sequence
    faces_on_right = vids.filter(
        and_pred(
            payload_satisfies(
                on_name('faces', length_at_most(MAX_FACES_ON_SCREEN))),
            payload_satisfies(
                on_name('faces', scene_graph(
                    graph,
                    region=right_half))))).dilate(SAMPLING_RATE / 2).coalesce(
                        payload_merge_op=merge_named_payload(
                            {
                                'faces': payload_first,
                                'shot_scale': payload_plus
                            })).filter(lambda intrvl: shot_scales_decreasing(
                                intrvl.get_payload()['shot_scale']))

    # Get sequences where there's a face on the left half of the screen and
    #   there are at most two faces
    # Payload is the faces in the first frame, and a list of the shot scales
    #   throughout the sequence
    faces_on_left = vids.filter(
        and_pred(
            payload_satisfies(
                on_name('faces', length_at_most(MAX_FACES_ON_SCREEN))),
            payload_satisfies(
                on_name('faces', scene_graph(
                    graph,
                    region=left_half))))).dilate(SAMPLING_RATE / 2).coalesce(
                        payload_merge_op=merge_named_payload(
                            {
                                'faces': payload_first,
                                'shot_scale': payload_plus
                            })).filter(lambda intrvl: shot_scales_decreasing(
                                intrvl.get_payload()['shot_scale']))

    # Sequences where faces on left up to one second before/after faces on left
    # Four seconds of buffer time between left-then-right/right-then-left
    #   segments
    # Filter sequences by decreasing shot sequences
    # Only keep remaining sequences that last longer than ten seconds
    shot_reverse_shot_intensification = faces_on_right.merge(
        faces_on_left, predicate=before(max_dist=ONE_SECOND)).set_union(
            faces_on_left.merge(faces_on_right,
                                predicate=before(max_dist=ONE_SECOND))
        ).dilate(FOUR_SECONDS).coalesce(
            payload_merge_op=merge_named_payload({
                'faces': payload_first,
                'shot_scale': payload_plus
            })).dilate(
                -1 *
                FOUR_SECONDS).filter(lambda intrvl: shot_scales_decreasing(
                    intrvl.get_payload()['shot_scale'])).filter_length(
                        min_length=TEN_SECONDS)

    def non_uniform(shot_scales):
        return (len(set(shot_scales)) > 2
                if 1 in set(shot_scales) else len(set(shot_scales)) > 1)

    # Finally, filter out any shot sequences where the shot scales are uniform
    shot_reverse_shot_intensification = shot_reverse_shot_intensification.filter(
        lambda intrvl: non_uniform(intrvl.get_payload()['shot_scale']))

    # Post-process to display in Esper widget
    if TIMELINE_OUTPUT:
        results = intrvllists_to_result(
            shot_reverse_shot_intensification.get_allintervals())
        add_intrvllists_to_result(results,
                                  faces_on_left.get_allintervals(),
                                  color='black')
        add_intrvllists_to_result(results,
                                  faces_on_right.get_allintervals(),
                                  color='green')
    else:
        results = intrvllists_to_result_with_objects(
            shot_reverse_shot_intensification.get_allintervals(),
            lambda payload, video: [])
    return results
Ejemplo n.º 13
0
def hero_shot():
    from query.models import Face
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.parsers import named_payload, in_array, bbox_payload_parser
    from rekall.parsers import merge_dict_parsers, dict_payload_parser
    from rekall.merge_ops import payload_plus, payload_first, merge_named_payload
    from rekall.payload_predicates import payload_satisfies, on_name
    from rekall.spatial_predicates import scene_graph
    from rekall.logical_predicates import and_pred
    from rekall.bbox_predicates import height_at_least, left_of, same_value
    from esper.rekall import intrvllists_to_result_with_objects, bbox_to_result_object

    # We're going to look for frames that would be good "hero shot" frames --
    #   potentially good frames to show in a Netflix preview, for instance.
    # We're going to look for frames where there's exactly one face of a
    #   certain height, and the frame has certain minimum brightness,
    #   sharpness, and contrast properties.
    MIN_FACE_HEIGHT = 0.2
    MIN_BRIGHTNESS = 50
    MIN_SHARPNESS = 50
    MIN_CONTRAST = 30
    FILM_NAME = "star wars the force awakens"

    # Annotate face rows with start and end frames, video ID, and frame image
    #   information
    faces_qs = Face.objects.annotate(min_frame=F('frame__number'),
                                     max_frame=F('frame__number'),
                                     video_id=F('frame__video_id'),
                                     brightness=F('frame__brightness'),
                                     contrast=F('frame__contrast'),
                                     sharpness=F('frame__sharpness')).filter(
                                         frame__video__name=FILM_NAME,
                                         brightness__isnull=False,
                                         contrast__isnull=False,
                                         sharpness__isnull=False)

    # Load bounding boxes and faces into rekall, and put all faces in one frame
    faces = VideoIntervalCollection.from_django_qs(
        faces_qs,
        with_payload=merge_dict_parsers([
            named_payload(
                'faces',
                in_array(
                    bbox_payload_parser(
                        VideoIntervalCollection.django_accessor))),
            dict_payload_parser(
                VideoIntervalCollection.django_accessor, {
                    'brightness': 'brightness',
                    'contrast': 'contrast',
                    'sharpness': 'sharpness'
                })
        ])).coalesce(
            merge_named_payload({
                'faces': payload_plus,
                'brightness': payload_first,
                'contrast': payload_first,
                'sharpness': payload_first
            }))

    # Hero shots are shots where there is exactly one face of at least a
    #   certain height, and brightness, contrast, and sharpness are at least
    #   some amount
    hero_shots = faces.filter(
        payload_satisfies(
            and_pred(
                on_name(
                    'faces',
                    scene_graph(
                        {
                            'nodes': [{
                                'name':
                                'face',
                                'predicates':
                                [height_at_least(MIN_FACE_HEIGHT)]
                            }],
                            'edges': []
                        },
                        exact=True)), lambda payload:
                (payload['brightness'] > MIN_BRIGHTNESS and payload['contrast']
                 > MIN_CONTRAST and payload['sharpness'] > MIN_SHARPNESS))))

    return intrvllists_to_result_with_objects(
        hero_shots.get_allintervals(),
        lambda payload, video_id:
        [bbox_to_result_object(bbox, video_id) for bbox in payload['faces']],
        limit=100,
        stride=10)