Beispiel #1
0
def interview_with_person_x():
    from query.models import LabeledCommercial, FaceIdentity
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.temporal_predicates import before, after, overlaps
    from rekall.logical_predicates import or_pred
    from esper.rekall import intrvllists_to_result

    # Get list of sandbox video IDs
    sandbox_videos = [
        row.video_id
        for row in LabeledCommercial.objects.distinct('video_id')
    ]

    guest_name = "bernie sanders"

    # Load hosts and instances of guest from SQL
    identities = FaceIdentity.objects.filter(face__shot__video_id__in=sandbox_videos)
    hosts_qs = identities.filter(face__is_host=True)
    guest_qs = identities.filter(identity__name=guest_name).filter(probability__gt=0.7)

    # Put bounding boxes in SQL
    hosts = VideoIntervalCollection.from_django_qs(
        hosts_qs.annotate(video_id=F("face__shot__video_id"),
            min_frame=F("face__shot__min_frame"),
            max_frame=F("face__shot__max_frame"))
        )
    guest = VideoIntervalCollection.from_django_qs(
        guest_qs.annotate(video_id=F("face__shot__video_id"),
        min_frame=F("face__shot__min_frame"),
        max_frame=F("face__shot__max_frame"))
    )

    # Get all shots where the guest and a host are on screen together
    guest_with_host = guest.overlaps(hosts).coalesce()

    # This temporal predicate defines A overlaps with B, or A before by less than 10 frames,
    #   or A after B by less than 10 frames
    overlaps_before_or_after_pred = or_pred(
            or_pred(overlaps(), before(max_dist=10), arity=2),
            after(max_dist=10), arity=2)

    # This code finds sequences of:
    #   guest with host overlaps/before/after host OR
    #   guest with host overlaps/before/after guest
    interview_candidates = guest_with_host \
            .merge(hosts, predicate=overlaps_before_or_after_pred) \
            .set_union(guest_with_host.merge(
                guest, predicate=overlaps_before_or_after_pred)) \
            .coalesce()

    # Sequences may be interrupted by shots where the guest or host don't
    #   appear, so dilate and coalesce to merge neighboring segments
    interviews = interview_candidates \
            .dilate(600) \
            .coalesce() \
            .dilate(-600) \
            .filter_length(min_length=1350)

    # Return intervals
    return intrvllists_to_result(interviews.get_allintervals())
Beispiel #2
0
def all_faces_rekall():
    from query.models import Face
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.parsers import in_array, bbox_payload_parser
    from rekall.merge_ops import payload_plus
    from esper.rekall import intrvllists_to_result_bbox
    from esper.stdlib import qs_to_result

    video_name = "inception"
    # Annotate face rows with start and end frames and the video ID
    faces = Face.objects.filter(frame__video__name=video_name).annotate(
        min_frame=F('frame__number'),
        max_frame=F('frame__number'),
        video_id=F('frame__video_id'))

    # Materialize all the faces and load them into rekall with bounding box payloads
    # Then coalesce them so that all faces in the same frame are in the same interval
    # NOTE that this is slow right now since we're loading all faces!
    vids = VideoIntervalCollection.from_django_qs(
        faces,
        with_payload=in_array(
            bbox_payload_parser(
                VideoIntervalCollection.django_accessor))).coalesce(
                    payload_merge_op=payload_plus)

    # Post-process to display in Esper widget
    return intrvllists_to_result_bbox(vids.get_allintervals(),
                                      limit=100,
                                      stride=100)
Beispiel #3
0
def consecutive_short_shots():
    from query.models import Shot
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.temporal_predicates import meets_before
    from esper.rekall import intrvllists_to_result_with_objects
    from django.db.models import ExpressionWrapper, FloatField

    NUM_SHOTS = 3
    MAX_SHOT_DURATION = 0.5

    short_shots = VideoIntervalCollection.from_django_qs(
        Shot.objects.annotate(duration=ExpressionWrapper(
            (F('max_frame') - F('min_frame')) / F('video__fps'),
            output_field=FloatField())).filter(
                duration__lt=MAX_SHOT_DURATION,
                duration__gt=0.,
                labeler__name='shot-hsvhist-face').all())

    n_shots = short_shots
    for n in range(2, NUM_SHOTS + 1):
        print('Constructing {} consecutive short shots'.format(n))

        n_shots = n_shots.merge(
            short_shots, predicate=meets_before(epsilon=1),
            working_window=1).coalesce().filter_length(min_length=1)

        print('There are {} videos with {} consecutive short shots'.format(
            len(n_shots.get_allintervals().keys()), n))

    return intrvllists_to_result_with_objects(n_shots,
                                              lambda a, b: [],
                                              limit=100,
                                              stride=1)
Beispiel #4
0
def get_caption_intrvlcol(phrase, video_ids=None):
    results = phrase_search(phrase, video_ids)

    if video_ids == None:
        videos = {v.id: v for v in Video.objects.all()}
    else:
        videos = {
            v.id: v
            for v in Video.objects.filter(id__in=video_ids).all()
        }

    def convert_time(k, t):
        return int(t * videos[k].fps)

    flattened = [(doc.id, convert_time(doc.id,
                                       p.start), convert_time(doc.id, p.end))
                 for doc in results for p in doc.postings]
    phrase_intrvllists = {}
    for video_id, t1, t2 in flattened:
        if video_id in phrase_intrvllists:
            phrase_intrvllists[video_id].append((t1, t2, 0))
        else:
            phrase_intrvllists[video_id] = [(t1, t2, 0)]

    for video_id, intrvllist in phrase_intrvllists.items():
        phrase_intrvllists[video_id] = IntervalList(intrvllist)
    phrase_intrvlcol = VideoIntervalCollection(phrase_intrvllists)
    print('Get {} intervals for phrase \"{}\"'.format(
        count_intervals(phrase_intrvlcol), phrase))
    return phrase_intrvlcol
Beispiel #5
0
def faces_with_gender():
    from query.models import FaceGender
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.parsers import in_array, bbox_payload_parser, merge_dict_parsers, dict_payload_parser
    from rekall.merge_ops import payload_plus
    from esper.rekall import intrvllists_to_result_bbox

    VIDEO_NAME_CONTAINS = "harry potter"

    # Annotate face rows with start and end frames and the video ID
    faces_with_gender = FaceGender.objects.annotate(
        min_frame=F('face__frame__number'),
        max_frame=F('face__frame__number'),
        video_id=F('face__frame__video_id'),
        bbox_x1=F('face__bbox_x1'),
        bbox_y1=F('face__bbox_y1'),
        bbox_x2=F('face__bbox_x2'),
        bbox_y2=F('face__bbox_y2'),
        gender_name=F('gender__name')).filter(
            face__frame__video__name__contains=VIDEO_NAME_CONTAINS)

    faces = VideoIntervalCollection.from_django_qs(
        faces_with_gender,
        with_payload=in_array(
            merge_dict_parsers([
                bbox_payload_parser(VideoIntervalCollection.django_accessor),
                dict_payload_parser(VideoIntervalCollection.django_accessor,
                                    {'gender': 'gender_name'})
            ]))).coalesce(payload_merge_op=payload_plus)

    return intrvllists_to_result_bbox(faces.get_allintervals(),
                                      limit=100,
                                      stride=1000)
Beispiel #6
0
def three_people():
    from query.models import Face
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.parsers import in_array, bbox_payload_parser
    from rekall.merge_ops import payload_plus
    from esper.rekall import intrvllists_to_result_bbox
    from rekall.payload_predicates import payload_satisfies
    from rekall.spatial_predicates import scene_graph
    from rekall.bbox_predicates import height_at_least, left_of, same_value

    MIN_FACE_HEIGHT = 0.3
    EPSILON = 0.05

    # Annotate face rows with start and end frames and the video ID
    faces = Face.objects.annotate(min_frame=F('frame__number'),
                                  max_frame=F('frame__number'),
                                  video_id=F('frame__video_id'))

    # Materialize all the faces and load them into rekall with bounding box payloads
    # Then coalesce them so that all faces in the same frame are in the same interval
    # NOTE that this is slow right now since we're loading all faces!
    face_lists = VideoIntervalCollection.from_django_qs(
        faces,
        with_payload=in_array(
            bbox_payload_parser(
                VideoIntervalCollection.django_accessor))).coalesce(
                    payload_merge_op=payload_plus)

    three_people_scene_graph = {
        'nodes': [{
            'name': 'face1',
            'predicates': [height_at_least(MIN_FACE_HEIGHT)]
        }, {
            'name': 'face2',
            'predicates': [height_at_least(MIN_FACE_HEIGHT)]
        }, {
            'name': 'face3',
            'predicates': [height_at_least(MIN_FACE_HEIGHT)]
        }],
        'edges': [{
            'start': 'face1',
            'end': 'face2',
            'predicates': [left_of(),
                           same_value('y1', epsilon=EPSILON)]
        }, {
            'start': 'face2',
            'end': 'face3',
            'predicates': [left_of(),
                           same_value('y1', epsilon=EPSILON)]
        }]
    }

    three_people = face_lists.filter(
        payload_satisfies(scene_graph(three_people_scene_graph, exact=True)))

    # Post-process to display in Esper widget
    return intrvllists_to_result_bbox(three_people.get_allintervals(),
                                      limit=100,
                                      stride=100)
Beispiel #7
0
def intrvlcol_second2frame(intrvlcol):
    intrvllists_frame = {}
    for video_id, intrvllist in intrvlcol.get_allintervals().items():
        video = Video.objects.filter(id=video_id)[0]
        fps = video.fps
        intrvllists_frame[video_id] = IntervalList([(int(i.start * fps), int(i.end * fps), i.payload) \
                                                  for i in intrvllist.get_intervals()] )
    return VideoIntervalCollection(intrvllists_frame)
Beispiel #8
0
def man_woman_up_close():
    from query.models import FaceGender
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.parsers import in_array, bbox_payload_parser, merge_dict_parsers, dict_payload_parser
    from rekall.merge_ops import payload_plus
    from esper.rekall import intrvllists_to_result_bbox
    from rekall.payload_predicates import payload_satisfies
    from rekall.spatial_predicates import scene_graph
    from rekall.bbox_predicates import height_at_least
    
    MIN_FACE_CONFIDENCE = 0.95
    MIN_GENDER_CONFIDENCE = 0.95
    MIN_FACE_HEIGHT = 0.6

    # Annotate face rows with start and end frames and the video ID
    faces_with_gender= FaceGender.objects.filter(face__frame__video__name=video_name).annotate(
        min_frame=F('face__frame__number'),
        max_frame=F('face__frame__number'),
        video_id=F('face__frame__video_id'),
        bbox_x1=F('face__bbox_x1'),
        bbox_y1=F('face__bbox_y1'),
        bbox_x2=F('face__bbox_x2'),
        bbox_y2=F('face__bbox_y2'),
        gender_name=F('gender__name'),
        face_probability=F('face__probability'))

    faces = VideoIntervalCollection.from_django_qs(
        faces_with_gender,
        with_payload=in_array(merge_dict_parsers([
            bbox_payload_parser(VideoIntervalCollection.django_accessor),
            dict_payload_parser(VideoIntervalCollection.django_accessor, { 'gender': 'gender_name' }),
            dict_payload_parser(VideoIntervalCollection.django_accessor, { 'gender_probability': 'probability' }),
            dict_payload_parser(VideoIntervalCollection.django_accessor, { 'face_probability': 'face_probability' })
        ]))
    ).coalesce(payload_merge_op=payload_plus)

    graph = {
        'nodes': [
            { 'name': 'face_male', 'predicates': [
                height_at_least(MIN_FACE_HEIGHT),
                lambda payload: payload['gender'] is 'M',
                lambda payload: payload['face_probability'] > MIN_FACE_CONFIDENCE,
                lambda payload: payload['gender_probability'] > MIN_GENDER_CONFIDENCE
                ] },
            { 'name': 'face_female', 'predicates': [
                height_at_least(MIN_FACE_HEIGHT),
                lambda payload: payload['gender'] is 'F',
                lambda payload: payload['face_probability'] > MIN_FACE_CONFIDENCE,
                lambda payload: payload['gender_probability'] > MIN_GENDER_CONFIDENCE
                ] },
        ],
        'edges': []
    }

    mf_up_close = faces.filter(payload_satisfies(
        scene_graph(graph, exact=True)))

    return intrvllists_to_result_bbox(mf_up_close.get_allintervals(), limit=100, stride=100)
Beispiel #9
0
def label_videos_with_shot_scale(video_ids):
    faces = Face.objects.annotate(
        min_frame=F('frame__number'),
        max_frame=F('frame__number'),
        video_id=F('frame__video__id')).filter(video_id__in=video_ids)
    poses = PoseMeta.objects.annotate(
        min_frame=F('frame__number'),
        max_frame=F('frame__number'),
        video_id=F('frame__video__id')).filter(video_id__in=video_ids)
    face_frames = VideoIntervalCollection.from_django_qs(
        faces,
        with_payload=rk.parsers.merge_dict_parsers([
            with_face(),
            with_named_empty_list('pose'),
            rk.parsers.dict_payload_parser(
                VideoIntervalCollection.django_accessor,
                {'frame_id': 'frame_id'})
        ]))
    pose_frames = VideoIntervalCollection.from_django_qs(
        poses,
        with_payload=rk.parsers.merge_dict_parsers([
            with_pose(),
            with_named_empty_list('face'),
            rk.parsers.dict_payload_parser(
                VideoIntervalCollection.django_accessor,
                {'frame_id': 'frame_id'})
        ]))
    faces_with_pose = face_frames.set_union(pose_frames).coalesce(
        merge_named_payload({
            'pose': rk.merge_ops.payload_plus,
            'face': rk.merge_ops.payload_plus,
            'frame_id': rk.merge_ops.payload_first
        }))
    frames_with_shot_scale = faces_with_pose.map(
        lambda intrvl: (intrvl.start, intrvl.end, {
            'pose': intrvl.payload['pose'],
            'face': intrvl.payload['face'],
            'frame_id': intrvl.payload['frame_id'],
            'shot_scale': payload_to_shot_scale(intrvl.payload)
        }))
    return frames_with_shot_scale
Beispiel #10
0
def get_commercial_intrvlcol(video_ids=None, granularity='frame'):
    if video_ids is None:
        commercial_qs = Commercial.objects.all()
    else:
        commercial_qs = Commercial.objects.filter(video_id__in=video_ids)

    commercial_intrvllists = qs_to_intrvllists(
        commercial_qs.annotate(video_id=F("video_id")))
    commercial = VideoIntervalCollection(commercial_intrvllists)
    if granularity == 'second':
        commercial = intrvlcol_frame2second(commercial)
    return commercial
Beispiel #11
0
def manual_shots_rekall():
    from query.models import Shot, Labeler
    from rekall.video_interval_collection import VideoIntervalCollection
    from esper.rekall import intrvllists_to_result
    from esper.stdlib import qs_to_result
        
    shots_qs = Shot.objects.filter(
        labeler__name__contains='manual')

    shots = VideoIntervalCollection.from_django_qs(shots_qs)

    return intrvllists_to_result_with_objects(shots.get_allintervals())
Beispiel #12
0
def panels_rekall():
    from query.models import LabeledCommercial, Face
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.parsers import in_array, bbox_payload_parser
    from rekall.merge_ops import payload_plus
    from rekall.bbox_predicates import height_at_least, same_value, left_of
    from rekall.spatial_predicates import scene_graph
    from rekall.payload_predicates import payload_satisfies
    from esper.rekall import intrvllists_to_result_bbox

    # Get list of sandbox video IDs
    sandbox_videos = [
        row.video_id
        for row in LabeledCommercial.objects.distinct('video_id')
    ]

    faces_qs = Face.objects.filter(shot__video_id__in=sandbox_videos).annotate(
        video_id=F("shot__video_id"),
        min_frame=F("shot__min_frame"),
        max_frame=F("shot__max_frame")
    )

    # One interval for each face
    faces = VideoIntervalCollection.from_django_qs(
            faces_qs,
            with_payload=in_array(
                bbox_payload_parser(
                    VideoIntervalCollection.django_accessor)))

    # Merge shots
    faces = faces.coalesce(payload_merge_op=payload_plus)

    # Define a scene graph for things that look like panels
    three_faces_scene_graph = {
        'nodes': [
            { 'name': 'face1', 'predicates': [ height_at_least(0.3) ] },
            { 'name': 'face2', 'predicates': [ height_at_least(0.3) ] },
            { 'name': 'face3', 'predicates': [ height_at_least(0.3) ] }
        ],
        'edges': [
            { 'start': 'face1', 'end': 'face2',
                'predicates': [ same_value('y1', epsilon=0.05), left_of() ] }, 
            { 'start': 'face2', 'end': 'face3',
                'predicates': [ same_value('y1', epsilon=0.05), left_of() ] }, 
        ]
    }

    panels = faces.filter(payload_satisfies(
        scene_graph(three_faces_scene_graph, exact=True)
    ))

    return intrvllists_to_result_bbox(panels.get_allintervals())
Beispiel #13
0
def cinematic_shots_rekall():
    from query.models import Shot, Labeler
    from rekall.video_interval_collection import VideoIntervalCollection
    from esper.rekall import intrvllists_to_result_with_objects
    from esper.stdlib import qs_to_result

    video_ids = [1]
    shots_qs = Shot.objects.filter(video_id__in=video_ids, cinematic=True)

    shots = VideoIntervalCollection.from_django_qs(shots_qs)

    return intrvllists_to_result_with_objects(shots.get_allintervals(),
                                              lambda payload, video: [])
Beispiel #14
0
def split_intrvlcol(intrvlcol, seg_length):
    intrvllists_split = {}
    for video_id, intrvllist in intrvlcol.get_allintervals().items():
        intervals_split = []
        for i in intrvllist.get_intervals():
            duration = i.end - i.start
            start = i.start
            while duration > 0:
                if duration > seg_length:
                    intervals_split.append((start, start + seg_length, i.payload))
                    duration -= seg_length
                    start += seg_length
                else:
                    intervals_split.append((start, start + duration, i.payload))
                    duration = 0
        intrvllists_split[video_id] = IntervalList(intervals_split)
    return VideoIntervalCollection(intrvllists_split)
Beispiel #15
0
def frames_with_character_x():
    from query.models import FaceCharacterActor
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.parsers import in_array, bbox_payload_parser, merge_dict_parsers, dict_payload_parser
    from rekall.merge_ops import payload_plus
    from rekall.payload_predicates import payload_satisfies
    from rekall.spatial_predicates import scene_graph
    from esper.rekall import intrvllists_to_result_bbox

    character_name = "harry potter"

    # Annotate face rows with start and end frames and the video ID
    faces_with_character_actor_qs = FaceCharacterActor.objects.annotate(
        min_frame=F('face__frame__number'),
        max_frame=F('face__frame__number'),
        video_id=F('face__frame__video_id'),
        bbox_x1=F('face__bbox_x1'),
        bbox_y1=F('face__bbox_y1'),
        bbox_x2=F('face__bbox_x2'),
        bbox_y2=F('face__bbox_y2'),
        character_name=F('characteractor__character__name'))

    faces_with_identity = VideoIntervalCollection.from_django_qs(
        faces_with_character_actor_qs,
        with_payload=in_array(
            merge_dict_parsers([
                bbox_payload_parser(VideoIntervalCollection.django_accessor),
                dict_payload_parser(VideoIntervalCollection.django_accessor,
                                    {'character': 'character_name'}),
            ]))).coalesce(payload_merge_op=payload_plus)

    faces_with_actor = faces_with_identity.filter(
        payload_satisfies(
            scene_graph({
                'nodes': [{
                    'name':
                    'face1',
                    'predicates': [lambda f: f['character'] == character_name]
                }],
                'edges': []
            })))

    return intrvllists_to_result_bbox(faces_with_actor.get_allintervals(),
                                      limit=100,
                                      stride=1000)
Beispiel #16
0
def faces_from_poses_rekall():
    from query.models import Pose
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.parsers import in_array, bbox_payload_parser
    from rekall.merge_ops import payload_plus
    from esper.rekall import intrvllists_to_result_bbox
    from esper.stdlib import qs_to_result

    # Annotate pose rows with start and end frames and the video ID
    poses = Pose.objects.annotate(min_frame=F('frame__number'),
                                  max_frame=F('frame__number'),
                                  video_id=F('frame__video_id'))

    # Parse the pose keypoints and get a bounding box around the face
    def get_face_bbox(pose):
        pose_keypoints = pose.pose_keypoints()
        face_indices = [
            Pose.Nose, Pose.Neck, Pose.REye, Pose.LEye, Pose.REar, Pose.LEar
        ]
        x_vals = [
            pose_keypoints[index][0] for index in face_indices
            if pose_keypoints[index][2] is not 0.0
        ]
        y_vals = [
            pose_keypoints[index][1] for index in face_indices
            if pose_keypoints[index][2] is not 0.0
        ]
        x1 = min(x_vals)
        y1 = min(y_vals)
        x2 = max(x_vals)
        y2 = max(y_vals)
        return {'x1': x1, 'y1': y1, 'x2': x2, 'y2': y2}

    # Materialize all the faces and load them into rekall with bounding box payloads
    # Then coalesce them so that all faces in the same frame are in the same interval
    # NOTE that this is slow right now since we're loading all poses!
    vids = VideoIntervalCollection.from_django_qs(poses,
        with_payload=in_array(get_face_bbox)) \
        .coalesce(payload_merge_op=payload_plus)

    # Post-process to display in Esper widget
    return intrvllists_to_result_bbox(vids.get_allintervals(),
                                      limit=100,
                                      stride=100)
Beispiel #17
0
def all_captions():
    from esper.captions import get_all_segments
    from rekall.video_interval_collection import VideoIntervalCollection
    from esper.rekall import intrvllists_to_result_with_objects
    
    video_ids = [1]

    # Only aligned captions are in the caption index
    results = get_all_segments(video_ids)
    caption_results = VideoIntervalCollection({
        video_id: [(
            word[0] * Video.objects.get(id=video_id).fps, # start frame
            word[1] * Video.objects.get(id=video_id).fps, # end frame
            word[2]) # payload is the word (string)
            for word in words]
        for video_id, words in results
    })
    
    return intrvllists_to_result_with_objects(caption_results, lambda a, b: [])
Beispiel #18
0
def multi_person_one_phrase(phrase, filters={}):
    '''
    Get all intervals which the phrase is being said
    
    @phrase: input phrase to be searched
    @filters: 
        'with_face': must contain exactly one face
        'gender': filter by gender
        'limit': number of output intervals
    '''
    videos = Video.objects.filter(threeyears_dataset=True)
    video_ids = [video.id for video in videos]
    phrase_intrvlcol = get_caption_intrvlcol(phrase.upper(), video_ids)

    def fn(i):
        faces = Face.objects.filter(shot__video__id=video_id,
                                    shot__min_frame__lte=i.start,
                                    shot__max_frame__gte=i.end)
        #         faces = Face.objects.filter(frame__number__gte=i.start, frame__number__lte=i.end)
        if len(faces) != 1:
            return False
        if 'gender' in filters:
            faceGender = FaceGender.objects.filter(face__id=faces[0].id)[0]
            if faceGender.gender.name != filters['gender']:
                return False
        return True

    if 'with_face' in filters:
        print('Filtering with face...')
        intrvlcol_withface = {}
        for video_id, intrvllist in phrase_intrvlcol.intervals.items():
            intrvllist_withface = intrvllist.filter(fn)
            if intrvllist_withface.size() > 0:
                intrvlcol_withface[video_id] = intrvllist_withface
            if 'limit' in filters and len(
                    intrvlcol_withface) == filters['limit']:
                break
        phrase_intrvlcol = VideoIntervalCollection(intrvlcol_withface)


#         print(len(phrase_intrvlcol))
    return intrvlcol2list(phrase_intrvlcol)
Beispiel #19
0
def get_numface_intrvlcol(relevant_shots, num_face=1):
    faces = Face.objects.filter(shot__in=list(relevant_shots)) \
            .annotate(video_id=F('shot__video_id')) \
            .annotate(min_frame=F('shot__min_frame')) \
            .annotate(max_frame=F('shot__max_frame'))

    # Materialize all the faces and load them into rekall with bounding box payloads
    # Then coalesce them so that all faces in the same frame are in the same interval
    # NOTE that this is slow right now since we're loading all faces!
    numface_intrvlcol = VideoIntervalCollection.from_django_qs(
        faces,
        with_payload=in_array(
            bbox_payload_parser(VideoIntervalCollection.django_accessor))
        ).coalesce(payload_merge_op=payload_plus).filter(payload_satisfies(length_exactly(num_face)))
    
    num_intrvl = 0
    for _, intrvllist in numface_intrvlcol.get_allintervals().items():
        num_intrvl += intrvllist.size()
    print("Get {} relevant {} face intervals".format(num_intrvl, num_face))
    return numface_intrvlcol
Beispiel #20
0
def all_poses():
    from query.models import PoseMeta
    from esper.stdlib import pose_to_dict, simple_result
    import esper.pose_wrapper as pw
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.parsers import in_array
    from rekall.merge_ops import payload_plus
    from esper.rekall import intrvllists_to_result_with_objects

    STRIDE = 1000
    LIMIT = 100

    # PoseMeta is a table that contains pose ID, labeler, and a pointer to
    #   a Frame.
    # NOTE that PoseMeta ID's have NO RELATION to Pose ID's.
    pose_meta_qs = PoseMeta.objects.annotate(min_frame=F('frame__number'),
                                             max_frame=F('frame__number'),
                                             video_id=F('frame__video_id'))

    # Use coalesce to get a list of frames we want
    # We store Video ID and frame number in the payload
    frames = VideoIntervalCollection.from_django_qs(
        pose_meta_qs[:LIMIT * STRIDE:STRIDE],
        with_payload=lambda pose_meta_obj:
        (pose_meta_obj.video_id, pose_meta_obj.min_frame)).coalesce()

    # pose_wrapper.get takes in a PoseMeta queryset or list of PoseMeta objects
    #   and returns a list of PoseWrapper objects.
    poses = frames.map(lambda interval: (
        interval.start, interval.end,
        pw.get(
            pose_meta_qs.filter(video_id=interval.payload[0],
                                min_frame=interval.payload[1]).all())))

    # We use pose_to_dict to draw PoseWrapper objects.
    return intrvllists_to_result_with_objects(
        poses, lambda pose_wrappers, video_id:
        [pose_to_dict(wrapper) for wrapper in pose_wrappers])
Beispiel #21
0
def all_face_landmarks():
    from query.models import Face
    from esper.stdlib import face_landmarks_to_dict, simple_result
    import esper.face_landmarks_wrapper as flw
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.parsers import in_array
    from rekall.merge_ops import payload_plus
    from esper.rekall import intrvllists_to_result_with_objects

    STRIDE = 1000
    LIMIT = 100

    # Face landmarks are keyed by Face ID's.
    faces_qs = Face.objects.annotate(min_frame=F('frame__number'),
                                     max_frame=F('frame__number'),
                                     video_id=F('frame__video_id'))

    # Use coalesce to get a list of frames we want
    # We store Video ID and frame number in the payload
    frames = VideoIntervalCollection.from_django_qs(
        faces_qs[:LIMIT * STRIDE:STRIDE],
        with_payload=lambda face_obj:
        (face_obj.video_id, face_obj.min_frame)).coalesce()

    # face_landmarks_wrapper.get takes in a Face queryset or list of Face
    #   objects and returns a list of LandmarksWrapper objects.
    landmarks = frames.map(lambda interval: (
        interval.start, interval.end,
        flw.get(
            faces_qs.filter(video_id=interval.payload[0],
                            min_frame=interval.payload[1]).all())))

    # We use face_landmarks_to_dict to draw LandmarksWrapper objects.
    return intrvllists_to_result_with_objects(
        landmarks, lambda landmarks_wrappers, video_id:
        [face_landmarks_to_dict(wrapper) for wrapper in landmarks_wrappers])
Beispiel #22
0
def caption_search():
    from esper.captions import topic_search
    from rekall.video_interval_collection import VideoIntervalCollection
    from esper.rekall import intrvllists_to_result_with_objects

    phrases = [
        'may the Force be with you', 'may the force be with you',
        'May the Force be with you', 'May the force be with you'
    ]
    results = topic_search(
        phrases,
        window_size=0,
        video_ids=[
            vid.id
            for vid in Video.objects.filter(name__contains="star wars").all()
        ])
    caption_results = VideoIntervalCollection({
        r.id: [((p.start * Video.objects.get(id=r.id).fps),
                (p.end * Video.objects.get(id=r.id).fps), 0)
               for p in r.postings]
        for r in results
    })

    return intrvllists_to_result_with_objects(caption_results, lambda a, b: [])
Beispiel #23
0
def get_person_intrvlcol(person_name, **kwargs):
    #     if video_ids is None:
    #         videos = Video.objects.filter(threeyears_dataset=True)
    #         video_ids = [video.id for video in videos]

    #     faceIDs = FaceIdentity.objects \
    #               .annotate(video_id=F("face__frame__video_id")) \
    #               .annotate(shot_boundary=F("face__frame__shot_boundary"))

    #     if not video_ids is None:
    #         faceIDs = faceIDs.filter(video_id__in=video_ids, shot_boundary=True) \

    if kwargs['labeler'] == 'old':  # old labeler model
        faceIDs = FaceIdentity.objects \
                  .exclude(face__shot__isnull=True) \
                  .filter(Q(labeler__name='face-identity-converted:'+person_name.lower()) |
                          Q(labeler__name='face-identity:'+person_name.lower()) ) \
                  .filter(probability__gt=0.9) \
                  .annotate(height=F("face__bbox_y2") - F("face__bbox_y1"))
        if 'large_face' in kwargs:
            faceIDs = faceIDs.filter(height__gte=0.3)

        person_intrvllists = qs_to_intrvllists(
            faceIDs.annotate(video_id=F("face__shot__video_id"))
                   .annotate(shot_id=F("face__shot_id"))
                   .annotate(min_frame=F("face__shot__min_frame"))
                   .annotate(max_frame=F("face__shot__max_frame")),\
            schema={
                'start': 'min_frame',
                'end': 'max_frame',
                'payload': 'shot_id'
            })
        person_intrvlcol = VideoIntervalCollection(person_intrvllists)
    else:  # new labeler model
        faceIDs = FaceIdentity.objects \
                  .filter(face__frame__shot_boundary=False) \
                  .filter(Q(labeler__name='face-identity-converted:'+person_name.lower()) |
                          Q(labeler__name='face-identity:'+person_name.lower()) ) \
                  .filter(probability__gt=0.9) \
                  .annotate(height=F("face__bbox_y2") - F("face__bbox_y1"))
        if 'large_face' in kwargs:
            faceIDs = faceIDs.filter(height__gte=0.3)

        person_intrvllists_raw = qs_to_intrvllists(
            faceIDs.annotate(video_id=F("face__frame__video_id"))
                   .annotate(frame_id=F("face__frame__number"))
                   .annotate(min_frame=F("face__frame__number"))
                   .annotate(max_frame=F("face__frame__number") + 1),\
            schema={
                'start': 'min_frame',
                'end': 'max_frame',
                'payload': 'frame_id'
            })
        # dilate and coalesce
        person_intrvllists = {}
        for video_id, intrvllist in person_intrvllists_raw.items():
            video = Video.objects.filter(id=video_id)[0]
            person_intrvllists[video_id] = intrvllist.dilate(
                int(video.fps * 1.6)).coalesce()
        person_intrvlcol = VideoIntervalCollection(person_intrvllists)

    print("Get {} intervals for person {}".format(
        count_intervals(person_intrvlcol), person_name))
    return person_intrvlcol
Beispiel #24
0
def shot_reverse_shot_complex():
    from query.models import Face, Shot
    from rekall.temporal_predicates import overlaps
    from rekall.merge_ops import payload_second, payload_plus
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.interval_list import Interval, IntervalList
    from rekall.parsers import in_array, bbox_payload_parser
    from rekall.payload_predicates import payload_satisfies
    from rekall.list_predicates import length_at_most
    from rekall.logical_predicates import and_pred
    from rekall.spatial_predicates import scene_graph, make_region
    from rekall.temporal_predicates import before, after
    from rekall.bbox_predicates import height_at_least
    from esper.rekall import intrvllists_to_result_with_objects

    VIDEO_NAME = 'godfather part iii'

    MAX_FACE_MOVEMENT = 0.15
    MIN_FACE_HEIGHT = 0.2
    MAX_FACES_ON_SCREEN = 4
    RIGHT_HALF_MIN_X = 0.33
    LEFT_HALF_MAX_X = 0.66
    SHOTS_LABELER_ID = 64
    # faces are sampled every 12 frames
    SAMPLING_RATE = 12
    # Annotate face rows with start and end frames and the video ID
    faces = Face.objects.annotate(min_frame=F('frame__number'),
                                  max_frame=F('frame__number'),
                                  video_id=F('frame__video_id')).filter(
                                      frame__video__name__contains=VIDEO_NAME)

    shots = VideoIntervalCollection.from_django_qs(Shot.objects.filter(
        video__name__contains=VIDEO_NAME, labeler_id=SHOTS_LABELER_ID),
                                                   with_payload=lambda obj: [])
    # vids are all faces for each frame
    vids = VideoIntervalCollection.from_django_qs(
        faces.filter(probability__gte=0.99),
        with_payload=in_array(
            bbox_payload_parser(
                VideoIntervalCollection.django_accessor))).coalesce(
                    payload_merge_op=payload_plus)

    right_half = make_region(RIGHT_HALF_MIN_X, 0.0, 1.0, 1.0)
    left_half = make_region(0.0, 0.0, LEFT_HALF_MAX_X, 1.0)
    graph = {
        'nodes': [{
            'name': 'face',
            'predicates': [height_at_least(MIN_FACE_HEIGHT)]
        }],
        'edges': []
    }

    faces_on_right = vids.filter(
        and_pred(payload_satisfies(length_at_most(MAX_FACES_ON_SCREEN)),
                 payload_satisfies(scene_graph(graph, region=right_half))))
    faces_on_left = vids.filter(
        and_pred(payload_satisfies(length_at_most(MAX_FACES_ON_SCREEN)),
                 payload_satisfies(scene_graph(graph, region=left_half))))

    def wrap_list(intvl):
        intvl.payload = [intvl.payload]
        return intvl

    def get_height(box):
        return box['y2'] - box['y1']

    def get_center(box):
        return ((box['x1'] + box['x2']) / 2, (box['y1'] + box['y2']) / 2)

    def get_distance(pt1, pt2):
        return np.sqrt((pt1[0] - pt2[0])**2 + (pt1[1] - pt2[1])**2)

    def find_highest_box(boxes):
        if len(boxes) == 0:
            return None
        result = boxes[0]
        best = get_height(result)
        for i in range(1, len(boxes)):
            h = get_height(boxes[i])
            if h > best:
                best = h
                result = boxes[i]
        return result

    def take_highest_in_frame(intvl):
        result = []
        for faces_in_frame in intvl.payload:
            largest = find_highest_box(faces_in_frame)
            if largest is not None:
                result.append(largest)
        intvl.payload = result
        return intvl

    # Check if displacement of box center between frames are within `dist`
    def inter_frame_movement_less_than(dist):
        def check(boxes):
            for b1, b2 in zip(boxes, boxes[1:]):
                if get_distance(get_center(b1), get_center(b2)) > dist:
                    return False
            return True

        return check

    # Payload is a list, each element is a list of faces for a frame
    shots_with_face_on_right = shots.merge(
        faces_on_right, predicate=overlaps(),
        payload_merge_op=payload_second).map(wrap_list).coalesce(
            payload_merge_op=payload_plus).map(take_highest_in_frame).filter(
                payload_satisfies(
                    inter_frame_movement_less_than(MAX_FACE_MOVEMENT)))
    shots_with_face_on_left = shots.merge(
        faces_on_left, predicate=overlaps(),
        payload_merge_op=payload_second).map(wrap_list).coalesce(
            payload_merge_op=payload_plus).map(take_highest_in_frame).filter(
                payload_satisfies(
                    inter_frame_movement_less_than(MAX_FACE_MOVEMENT)))

    # Right-Left-Right sequences
    shot_reverse_shot_1 = shots_with_face_on_right.merge(
        shots_with_face_on_left,
        predicate=before(max_dist=1)).merge(shots_with_face_on_right,
                                            predicate=before(max_dist=1))

    # Left-Right-Left sequences
    shot_reverse_shot_2 = shots_with_face_on_left.merge(
        shots_with_face_on_right,
        predicate=before(max_dist=1)).merge(shots_with_face_on_left,
                                            predicate=before(max_dist=1))

    shot_reverse_shot = shot_reverse_shot_1.set_union(
        shot_reverse_shot_2).coalesce()
    result = intrvllists_to_result_with_objects(
        shot_reverse_shot.get_allintervals(), payload_to_objs=lambda p, v: [])
    return result
Beispiel #25
0
# CONTINUE_PATH = '/app/notebooks/learning/models/deepsbd_resnet_train_on_40000_min_weak/fold1_270000_iteration.pth'

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print('Initialized constants')

# load folds from disk
with open(FOLDS_PATH, 'rb') as f:
    folds = pickle.load(f)

# Load DeepSBD datasets for each fold. This is used for testing.
deepsbd_datasets_weak_testing = []
for fold in folds:
    shots_in_fold_qs = Shot.objects.filter(labeler__name__contains='manual',
                                           video_id__in=fold)
    shots_in_fold = VideoIntervalCollection.from_django_qs(shots_in_fold_qs)

    data = movies_deepsbd_data.DeepSBDDataset(shots_in_fold,
                                              verbose=True,
                                              preload=False,
                                              logits=True,
                                              local_path=LOCAL_PATH)
    deepsbd_datasets_weak_testing.append(data)

print('Loaded test data')

# load weak labels
if TRAINING_SET != 'ground_truth':
    with open(WEAK_LABELS_PATH, 'rb') as f:
        weak_labels_windows = np.load(f)
Beispiel #26
0
    def __init__(self,
                 shots,
                 window_size=16,
                 stride=8,
                 size=128,
                 verbose=False,
                 preload=True,
                 logits=False,
                 local_path=None):
        """Constrcutor for ShotDetectionDataset.
        
        Args:
            shots: VideoIntervalCollection of all the intervals to get frames from. If the payload is -1,
            then the interval is not an actual shot and just needs to be included in the dataset.
        """
        self.window_size = window_size
        self.preload = preload
        self.logits = logits
        self.local_path = local_path
        self.storehouse_backend = storehouse.StorageBackend.make_from_config(
            storehouse.StorageConfig.make_posix_config())
        items = set()
        frame_nums = {}

        shot_boundaries = shots.map(lambda intrvl: (
            intrvl.start, intrvl.start, intrvl.payload)).set_union(
                shots.map(lambda intrvl:
                          (intrvl.end + 1, intrvl.end + 1, intrvl.payload))
            ).coalesce().filter(lambda intrvl: intrvl.payload != -1)

        clips = shots.dilate(1).coalesce().dilate(-1).map(lambda intrvl: (
            intrvl.start - stride - ((intrvl.start - stride) % stride), intrvl.
            end + stride - ((intrvl.end + stride) % stride
                            ), intrvl.payload)).dilate(1).coalesce().dilate(-1)

        items_intrvls = {}
        for video_id in clips.get_allintervals():
            items_intrvls[video_id] = []
            for intrvl in clips.get_intervallist(video_id).get_intervals():
                items_intrvls[video_id] += [
                    (f, f + window_size, 0 if not logits else (1, 0, 0))
                    for f in range(intrvl.start, intrvl.end - stride, stride)
                ]
        items_col = VideoIntervalCollection(items_intrvls)

        items_w_boundaries = items_col.filter_against(
            shot_boundaries, predicate=during_inv()).map(lambda intrvl: (
                intrvl.start, intrvl.end, 2 if not logits else (0, 0, 1)))

        items_w_labels = items_col.minus(
            items_w_boundaries,
            predicate=equal()).set_union(items_w_boundaries)

        for video_id in items_w_labels.get_allintervals():
            path = Video.objects.get(id=video_id).path
            frame_nums[video_id] = set()
            for intrvl in items_w_labels.get_intervallist(
                    video_id).get_intervals():
                items.add(
                    (video_id, intrvl.start, intrvl.end, intrvl.payload, path))
                for f in range(intrvl.start, intrvl.end):
                    frame_nums[video_id].add(f)

        self.items = sorted(list(items),
                            key=lambda item: (item[0], item[1], item[2]))

        self.transform = transforms.Compose([
            transforms.ToPILImage(),
            Scale((128, 128)),
            ToTensor(1),
            Normalize(get_mean(1), (1, 1, 1))
        ])
                window) > POSITIVE_OUTLIER_THRESHOLD_FLOW_MAGNITUDE * np.std(
                    window):
            positive_boundaries.append(i)
        if avg_magnitudes[i] - np.mean(
                window) < NEGATIVE_OUTLIER_THRESHOLD_FLOW_MAGNITUDE * np.std(
                    window):
            negative_boundaries.append(i)

    return positive_boundaries, negative_boundaries


db = scannerpy.Database()

# Load up all manually annotated shots
shots_qs = Shot.objects.filter(labeler__name__contains='manual')
shots = VideoIntervalCollection.from_django_qs(shots_qs)
shot_video_ids = sorted(list(shots.get_allintervals().keys()))

#videos = list(Video.objects.filter(ignore_film=False).exclude(id__in=shot_video_ids).order_by('id').all())
videos = list(Video.objects.filter(ignore_film=False).order_by('id').all())

frames = [range(0, video.num_frames) for video in videos]

print("Generating weak labels from RGB histograms")
output_directory = '/app/data/shot_detection_weak_labels/rgb_hists_high_pre'
rgb_hists = st.histograms.compute_histograms(
    db, videos=[video.for_scannertools() for video in videos], frames=frames)
for video, rgb_hist in tqdm(zip(videos, rgb_hists), total=len(videos)):
    pos_bounds, neg_bounds = color_histogram_shot_labels(
        rgb_hist.load(), WINDOW_SIZE, POSITIVE_OUTLIER_THRESHOLD_COLOR_HIST,
        NEGATIVE_OUTLIER_THRESHOLD_COLOR_HIST)
Beispiel #28
0
def shot_reverse_shot():
    from query.models import Face
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.parsers import in_array, bbox_payload_parser
    from rekall.merge_ops import payload_plus
    from esper.rekall import intrvllists_to_result_bbox
    from rekall.payload_predicates import payload_satisfies
    from rekall.list_predicates import length_at_most
    from rekall.logical_predicates import and_pred, or_pred
    from rekall.spatial_predicates import scene_graph, make_region
    from rekall.temporal_predicates import before, after
    from rekall.bbox_predicates import height_at_least
    from esper.rekall import intrvllists_to_result, intrvllists_to_result_with_objects, add_intrvllists_to_result

    # If True, visualize results in a timeline
    TIMELINE_OUTPUT = False

    RIGHT_HALF_MIN_X = 0.45
    LEFT_HALF_MAX_X = 0.55
    MIN_FACE_HEIGHT = 0.4
    MAX_FACES_ON_SCREEN = 2
    # faces are sampled every 12 frames
    SAMPLING_RATE = 12
    ONE_SECOND = 24
    FOUR_SECONDS = 96
    TEN_SECONDS = 240

    # Annotate face rows with start and end frames and the video ID
    faces = Face.objects.annotate(min_frame=F('frame__number'),
                                  max_frame=F('frame__number'),
                                  video_id=F('frame__video_id'))

    right_half = make_region(RIGHT_HALF_MIN_X, 0.0, 1.0, 1.0)
    left_half = make_region(0.0, 0.0, LEFT_HALF_MAX_X, 1.0)

    graph = {
        'nodes': [{
            'name': 'face',
            'predicates': [height_at_least(MIN_FACE_HEIGHT)]
        }],
        'edges': []
    }

    vids = VideoIntervalCollection.from_django_qs(
        faces,
        with_payload=in_array(
            bbox_payload_parser(
                VideoIntervalCollection.django_accessor))).coalesce(
                    payload_merge_op=payload_plus)

    # Get sequences where there's a face on the right half of the screen and
    #   there are at most two faces
    faces_on_right = vids.filter(
        and_pred(payload_satisfies(length_at_most(MAX_FACES_ON_SCREEN)),
                 payload_satisfies(scene_graph(
                     graph, region=right_half)))).dilate(SAMPLING_RATE /
                                                         2).coalesce()

    # Get sequences where there's a face on the left half of the screen and
    #   there are at most two faces
    faces_on_left = vids.filter(
        and_pred(payload_satisfies(length_at_most(MAX_FACES_ON_SCREEN)),
                 payload_satisfies(scene_graph(
                     graph,
                     region=left_half)))).dilate(SAMPLING_RATE / 2).coalesce()

    # Sequences where faces on left up to one second before/after faces on left
    # Four seconds of buffer time between left-then-right/right-then-left
    #   segments
    # Only keep remaining sequences that last longer than ten seconds
    shot_reverse_shot = faces_on_right.merge(
        faces_on_left,
        predicate=or_pred(
            before(max_dist=ONE_SECOND), after(max_dist=ONE_SECOND),
            arity=2)).dilate(FOUR_SECONDS).coalesce().dilate(
                -1 * FOUR_SECONDS).filter_length(min_length=TEN_SECONDS)

    # Post-process to display in Esper widget
    if TIMELINE_OUTPUT:
        results = intrvllists_to_result(shot_reverse_shot.get_allintervals())
        add_intrvllists_to_result(results,
                                  faces_on_left.get_allintervals(),
                                  color='black')
        add_intrvllists_to_result(results,
                                  faces_on_right.get_allintervals(),
                                  color='green')
    else:
        results = intrvllists_to_result_with_objects(
            shot_reverse_shot.get_allintervals(), lambda payload, video: [])
    return results
Beispiel #29
0
# resnet deepSBD pre-trained on Kinetics
deepsbd_resnet_model_no_clipshots = deepsbd_resnet.resnet18(num_classes=3,
                                                            sample_size=128,
                                                            sample_duration=16)
deepsbd_resnet_model_no_clipshots = deepsbd_resnet_model_no_clipshots.to(
    device).train()

if TRAINING_SET in ["kfolds", "ground_truth"]:
    # Load DeepSBD datasets for each fold. This is used for testing.
    deepsbd_datasets_weak_testing = []
    if not SAME_VAL_TEST:
        for fold in folds:
            shots_in_fold_qs = Shot.objects.filter(
                labeler__name__contains='manual', video_id__in=fold)
            shots_in_fold = VideoIntervalCollection.from_django_qs(
                shots_in_fold_qs)
            shots_per_fold.append(shots_in_fold)

            data = movies_deepsbd_data.DeepSBDDataset(shots_in_fold,
                                                      verbose=True,
                                                      preload=False,
                                                      logits=True,
                                                      local_path=LOCAL_PATH)
            deepsbd_datasets_weak_testing.append(data)
    else:
        with open(VAL_WINDOWS, 'rb') as f:
            val_windows_by_video_id = pickle.load(f)
        with open(Y_VAL, 'rb') as f:
            Y_val = np.load(f)
        paths = {
            video_id: Video.objects.get(id=video_id).path
Beispiel #30
0
def get_person_intrvlcol(person_list=None,
                         video_ids=None,
                         probability=0.9,
                         face_size=None,
                         stride_face=False,
                         labeler='new',
                         exclude_person=False,
                         granularity='frame',
                         payload_type='shot_id'):
    def identity_filter(person_list):
        filter_all = None
        for p in person_list:
            if labeler == 'new':
                filter = Q(labeler__name='face-identity-converted:' +
                           p) | Q(labeler__name='face-identity:' + p)
            else:
                filter = Q(labeler__name='face-identity-old:' + p)
            if filter_all is None:
                filter_all = filter
            else:
                filter_all = filter_all | filter
        return filter_all

    if stride_face:
        labeler = 'new'
    if type(person_list) == str:
        person_list = [person_list.lower()]
    else:
        person_list = [p.lower() for p in person_list]

    faceIDs = FaceIdentity.objects \
              .filter(probability__gt=probability) \
              .annotate(face_size=F("face__bbox_y2") - F("face__bbox_y1")) \
              .annotate(video_id=F("face__frame__video_id")) \

    if not stride_face:
        faceIDs = faceIDs.exclude(face__shot__isnull=True)
    else:
        faceIDs = faceIDs.filter(face__frame__shot_boundary=False)

    if not person_list is None:
        if not exclude_person:
            faceIDs = faceIDs.filter(identity_filter(person_list))
        else:
            faceIDs = faceIDs.exclude(identity_filter(person_list))

    if not face_size is None:
        faceIDs = faceIDs.filter(face_size__gte=face_size)

    if not video_ids is None:
        faceIDs = faceIDs.filter(video_id__in=video_ids)

    if not stride_face:
        person_intrvllists = qs_to_intrvllists(
            faceIDs.annotate(video_id=F("face__shot__video_id"))
                   .annotate(shot_id=F("face__shot_id"))
                   .annotate(min_frame=F("face__shot__min_frame"))
                   .annotate(max_frame=F("face__shot__max_frame"))
                   .annotate(faceID_id=F("identity_id")),\
            schema={
                'start': 'min_frame',
                'end': 'max_frame',
                'payload': payload_type
            })
        person_intrvlcol = VideoIntervalCollection(
            person_intrvllists).coalesce()
    else:
        if payload_type == 'shot_id':
            payload_type = 'frame_id'
        person_intrvllists_raw = qs_to_intrvllists(
            faceIDs.annotate(video_id=F("face__frame__video_id"))
                   .annotate(frame_id=F("face__frame__number"))
                   .annotate(min_frame=F("face__frame__number"))
                   .annotate(max_frame=F("face__frame__number") + 1)
                   .annotate(faceID_id=F("identity_id")),\
            schema={
                'start': 'min_frame',
                'end': 'max_frame',
                'payload': payload_type
            })
        # dilate and coalesce
        SAMPLE_RATE = 3
        person_intrvllists = {}
        for video_id, intrvllist in person_intrvllists_raw.items():
            video = Video.objects.filter(id=video_id)[0]
            dilation = int(video.fps * SAMPLE_RATE / 2)
            person_intrvllists[video_id] = intrvllist.dilate(
                dilation).coalesce().dilate(-dilation)
        person_intrvlcol = VideoIntervalCollection(person_intrvllists)

    if granularity == 'second':
        person_intrvlcol = intrvlcol_frame2second(person_intrvlcol)

    print("Get {} intervals for person {}".format(
        count_intervals(person_intrvlcol),
        person_list[0] + ' ...' if len(person_list) > 1 else person_list[0]))
    return person_intrvlcol