Ejemplo n.º 1
0
def interview_with_person_x():
    from query.models import LabeledCommercial, FaceIdentity
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.temporal_predicates import before, after, overlaps
    from rekall.logical_predicates import or_pred
    from esper.rekall import intrvllists_to_result

    # Get list of sandbox video IDs
    sandbox_videos = [
        row.video_id
        for row in LabeledCommercial.objects.distinct('video_id')
    ]

    guest_name = "bernie sanders"

    # Load hosts and instances of guest from SQL
    identities = FaceIdentity.objects.filter(face__shot__video_id__in=sandbox_videos)
    hosts_qs = identities.filter(face__is_host=True)
    guest_qs = identities.filter(identity__name=guest_name).filter(probability__gt=0.7)

    # Put bounding boxes in SQL
    hosts = VideoIntervalCollection.from_django_qs(
        hosts_qs.annotate(video_id=F("face__shot__video_id"),
            min_frame=F("face__shot__min_frame"),
            max_frame=F("face__shot__max_frame"))
        )
    guest = VideoIntervalCollection.from_django_qs(
        guest_qs.annotate(video_id=F("face__shot__video_id"),
        min_frame=F("face__shot__min_frame"),
        max_frame=F("face__shot__max_frame"))
    )

    # Get all shots where the guest and a host are on screen together
    guest_with_host = guest.overlaps(hosts).coalesce()

    # This temporal predicate defines A overlaps with B, or A before by less than 10 frames,
    #   or A after B by less than 10 frames
    overlaps_before_or_after_pred = or_pred(
            or_pred(overlaps(), before(max_dist=10), arity=2),
            after(max_dist=10), arity=2)

    # This code finds sequences of:
    #   guest with host overlaps/before/after host OR
    #   guest with host overlaps/before/after guest
    interview_candidates = guest_with_host \
            .merge(hosts, predicate=overlaps_before_or_after_pred) \
            .set_union(guest_with_host.merge(
                guest, predicate=overlaps_before_or_after_pred)) \
            .coalesce()

    # Sequences may be interrupted by shots where the guest or host don't
    #   appear, so dilate and coalesce to merge neighboring segments
    interviews = interview_candidates \
            .dilate(600) \
            .coalesce() \
            .dilate(-600) \
            .filter_length(min_length=1350)

    # Return intervals
    return intrvllists_to_result(interviews.get_allintervals())
Ejemplo n.º 2
0
 def fold_fn(stack, interval):
     if interval.length() > MAX_COMMERCIAL_TIME:
         interval = Interval(interval.start, interval.start + MAX_COMMERCIAL_TIME, interval.payload)
     if len(stack) == 0:
         stack.append(interval)
     else:
         last = stack.pop()
         if or_pred(overlaps(), after(max_dist=5), arity=2)(interval, last):
             if last.merge(interval).length() > MAX_COMMERCIAL_TIME:
                 stack.append(Interval(
                     last.start, 
                     last.start + MAX_COMMERCIAL_TIME, 
                     last.payload))
             else:
                 stack.append(last.merge(interval))
         else:
             stack.append(last)
             stack.append(interval)
     return stack
Ejemplo n.º 3
0
def detect_commercial_rekall(video,
                             transcript_path,
                             blackframe_list=None,
                             histogram=None,
                             verbose=True):
    """
    API for detecting commercial blocks from TV news video using rekall
    
    @video: django query set
    @transcript_path: transcript_path
    @blackframe_list: list of black frames index
    @histogram: list of histogram 16x3 bin for each frame, not used if blackframe_list is provided  
    
    Return: commercial_list (list of tuple((start_fid, start_sec), (end_fid, end_sec)), None if failed)
    """

    transcript = load_transcript(transcript_path)
    if blackframe_list is None:
        blackframe_intervallist = get_blackframe_list(histogram)
    else:
        blackframe_intervallist = IntervalList([
            (fid2second(fid, video.fps), fid2second(fid + 1, video.fps), 0)
            for fid in blackframe_list
        ])

    black_windows = blackframe_intervallist \
            .dilate(1. / video.fps) \
            .coalesce() \
            .dilate(-1. / video.fps) \
            .filter_length(min_length=MIN_BLACKWINDOW * 1. / video.fps)
    #     if verbose:
    #         print("black window: ({})\n".format(black_windows.size()))
    #         for idx, win in enumerate(black_windows.get_intervals()):
    #             print(idx, win)

    # get all instances of >>, Announcer:, and  >> Announcer: in transcript
    arrow_text = get_text_intervals(">>", transcript)
    announcer_text = get_text_intervals("Announcer:", transcript)
    arrow_announcer_text = get_text_intervals(">> Announcer:", transcript)
    #     if verbose:
    #         print('arrow_text', arrow_text)
    #         print('announcer_text', announcer_text)
    #         print('arrow_announcer_text', arrow_announcer_text)

    # get an interval for the whole video
    whole_video = IntervalList([(0., video.num_frames / video.fps, 0)])

    # whole video minus black windows to get segments in between black windows
    # then filter out anything that overlaps with ">>" as long as it's not
    #   ">> Announcer:"
    # then coalesce, as long as it doesn't get too long
    def fold_fn(stack, interval):
        if len(stack) == 0:
            stack.append(interval)
        else:
            last = stack.pop()
            if or_pred(overlaps(), after(max_dist=1), arity=2)(interval, last):
                if last.merge(interval).length() > MAX_COMMERCIAL_TIME:
                    if last.length() > MAX_COMMERCIAL_TIME:
                        stack.append(
                            Interval(last.start,
                                     last.start + MAX_COMMERCIAL_TIME,
                                     last.payload))
                    else:
                        stack.append(last)
                    stack.append(interval)
                else:
                    stack.append(last.merge(interval))
            else:
                stack.append(last)
                stack.append(interval)
        return stack

    all_blocks = whole_video.minus(black_windows)
    non_commercial_blocks = all_blocks.filter_against(
        arrow_text.minus(arrow_announcer_text), predicate=overlaps())
    commercial_blocks = whole_video.minus(non_commercial_blocks)
    if verbose:
        print("commercial blocks candidates: ({})\n".format(
            commercial_blocks.size()))
        for idx, win in enumerate(commercial_blocks.get_intervals()):
            print(idx, win)

    commercials = commercial_blocks \
        .fold_list(fold_fn, []) \
        .filter_length(min_length = MIN_COMMERCIAL_TIME)
    #     commercials = whole_video \
    #             .minus(black_windows) \
    #             .filter_against(
    #                 arrow_text.filter_against(arrow_announcer_text,
    #                     predicate=not_pred(overlaps(), arity=2)),
    #                 predicate=not_pred(overlaps(), arity=2)
    #             ) \
    #             .set_union(black_windows) \
    #             .fold_list(fold_fn, []) \
    #             .filter_length(min_length = MIN_COMMERCIAL_TIME)

    if verbose:
        print("commercials from blackwindow:\n", commercials)

    # add in lowercase intervals
    lowercase_intervals = get_lowercase_intervals(transcript)
    if verbose:
        print("lowercase intervals:\n", lowercase_intervals)
    commercials = commercials \
            .set_union(lowercase_intervals) \
            .dilate(MIN_COMMERCIAL_GAP / 2) \
            .coalesce() \
            .dilate(MIN_COMMERCIAL_GAP / 2)
    if verbose:
        print("commercials merge with lowercase:\n", commercials)

#     if verbose:
#         print(whole_video)
#         print(IntervalList([
#             (start_sec - TRANSCRIPT_DELAY, end_sec - TRANSCRIPT_DELAY, 0)
#             for text, start_sec, end_sec in transcript
#         ]).coalesce().size())

# get blank intervals
    blank_intervals = whole_video.minus(
        IntervalList([
            (start_sec - TRANSCRIPT_DELAY, end_sec - TRANSCRIPT_DELAY, 0)
            for text, start_sec, end_sec in transcript
        ]).coalesce()).coalesce().filter_length(min_length=MIN_BLANKWINDOW,
                                                max_length=MAX_BLANKWINDOW)

    if verbose:
        print("blank intervals:\n", blank_intervals)

    # add in blank intervals, but only if adding in the new intervals doesn't
    #   get too long
    commercials = commercials.merge(blank_intervals,
            predicate=or_pred(before(max_dist=MAX_MERGE_GAP),
                after(max_dist=MAX_MERGE_GAP), arity=2),
            working_window=MAX_MERGE_GAP
            ) \
            .filter_length(max_length=MAX_MERGE_DURATION) \
            .set_union(commercials) \
            .dilate(MIN_COMMERCIAL_GAP / 2) \
            .coalesce() \
            .dilate(MIN_COMMERCIAL_GAP / 2)
    if verbose:
        print("commercials merge with lowercase:\n", commercials)

    # post-process commercials to get rid of gaps, small commercials, and
    #   islated blocks
    small_gaps = whole_video \
            .minus(commercials) \
            .filter_length(max_length = MAX_COMMERCIAL_GAP) \
            .filter_against(
                    arrow_text.filter_against(
                        announcer_text,
                        predicate=not_pred(overlaps()),
                        working_window=1.0
                    ), predicate=not_pred(overlaps()),
                    working_window=1.0)

    # merge with small gaps, but only if that doesn't make things too long
    commercials = commercials \
            .set_union(small_gaps.dilate(0.1)) \
            .coalesce() \
            .filter_length(max_length=MAX_COMMERCIAL_TIME) \
            .set_union(commercials) \
            .coalesce()

    # get isolated commercials
    not_isolated_commercials = commercials.filter_against(
        commercials,
        predicate=or_pred(before(max_dist=MAX_COMMERCIAL_TIME),
                          after(max_dist=MAX_COMMERCIAL_TIME),
                          arity=2),
        working_window=MAX_COMMERCIAL_TIME)
    isolated_commercials = commercials.minus(not_isolated_commercials)
    commercials_to_delete = isolated_commercials \
            .filter_length(max_length=MIN_COMMERCIAL_TIME_FINAL) \
            .set_union(isolated_commercials \
                .filter_against(blank_intervals, predicate=equal()) \
                .filter_length(max_length=MAX_ISOLATED_BLANK_TIME))

    commercials = commercials.minus(commercials_to_delete)

    return commercials
Ejemplo n.º 4
0
def shot_reverse_shot():
    from query.models import Face
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.parsers import in_array, bbox_payload_parser
    from rekall.merge_ops import payload_plus
    from esper.rekall import intrvllists_to_result_bbox
    from rekall.payload_predicates import payload_satisfies
    from rekall.list_predicates import length_at_most
    from rekall.logical_predicates import and_pred, or_pred
    from rekall.spatial_predicates import scene_graph, make_region
    from rekall.temporal_predicates import before, after
    from rekall.bbox_predicates import height_at_least
    from esper.rekall import intrvllists_to_result, intrvllists_to_result_with_objects, add_intrvllists_to_result

    # If True, visualize results in a timeline
    TIMELINE_OUTPUT = False

    RIGHT_HALF_MIN_X = 0.45
    LEFT_HALF_MAX_X = 0.55
    MIN_FACE_HEIGHT = 0.4
    MAX_FACES_ON_SCREEN = 2
    # faces are sampled every 12 frames
    SAMPLING_RATE = 12
    ONE_SECOND = 24
    FOUR_SECONDS = 96
    TEN_SECONDS = 240

    # Annotate face rows with start and end frames and the video ID
    faces = Face.objects.annotate(min_frame=F('frame__number'),
                                  max_frame=F('frame__number'),
                                  video_id=F('frame__video_id'))

    right_half = make_region(RIGHT_HALF_MIN_X, 0.0, 1.0, 1.0)
    left_half = make_region(0.0, 0.0, LEFT_HALF_MAX_X, 1.0)

    graph = {
        'nodes': [{
            'name': 'face',
            'predicates': [height_at_least(MIN_FACE_HEIGHT)]
        }],
        'edges': []
    }

    vids = VideoIntervalCollection.from_django_qs(
        faces,
        with_payload=in_array(
            bbox_payload_parser(
                VideoIntervalCollection.django_accessor))).coalesce(
                    payload_merge_op=payload_plus)

    # Get sequences where there's a face on the right half of the screen and
    #   there are at most two faces
    faces_on_right = vids.filter(
        and_pred(payload_satisfies(length_at_most(MAX_FACES_ON_SCREEN)),
                 payload_satisfies(scene_graph(
                     graph, region=right_half)))).dilate(SAMPLING_RATE /
                                                         2).coalesce()

    # Get sequences where there's a face on the left half of the screen and
    #   there are at most two faces
    faces_on_left = vids.filter(
        and_pred(payload_satisfies(length_at_most(MAX_FACES_ON_SCREEN)),
                 payload_satisfies(scene_graph(
                     graph,
                     region=left_half)))).dilate(SAMPLING_RATE / 2).coalesce()

    # Sequences where faces on left up to one second before/after faces on left
    # Four seconds of buffer time between left-then-right/right-then-left
    #   segments
    # Only keep remaining sequences that last longer than ten seconds
    shot_reverse_shot = faces_on_right.merge(
        faces_on_left,
        predicate=or_pred(
            before(max_dist=ONE_SECOND), after(max_dist=ONE_SECOND),
            arity=2)).dilate(FOUR_SECONDS).coalesce().dilate(
                -1 * FOUR_SECONDS).filter_length(min_length=TEN_SECONDS)

    # Post-process to display in Esper widget
    if TIMELINE_OUTPUT:
        results = intrvllists_to_result(shot_reverse_shot.get_allintervals())
        add_intrvllists_to_result(results,
                                  faces_on_left.get_allintervals(),
                                  color='black')
        add_intrvllists_to_result(results,
                                  faces_on_right.get_allintervals(),
                                  color='green')
    else:
        results = intrvllists_to_result_with_objects(
            shot_reverse_shot.get_allintervals(), lambda payload, video: [])
    return results