def test_minus_against_nothing(self): intrvl_long1 = Interval(1., 10., 1) intrvl_long2 = Interval(3., 15., 2) intrvlslong = IntervalList([intrvl_long2, intrvl_long1]) intrvlshort1 = Interval(20., 20.5, 3) intrvlsshort = IntervalList([intrvlshort1]) intrvlsminusrec = intrvlslong.minus(intrvlsshort) self.assertEqual(len(intrvlsminusrec.intrvls), 2) self.assertEqual(intrvlsminusrec.intrvls[0].__repr__(), "<Interval start:1.0 end:10.0 payload:1>") self.assertEqual(intrvlsminusrec.intrvls[1].__repr__(), "<Interval start:3.0 end:15.0 payload:2>")
def test_minus(self): intrvl_long1 = Interval(1., 10., 1) intrvl_long2 = Interval(3., 15., 2) intrvlshort1 = Interval(2., 2.5, 3) intrvlshort2 = Interval(2., 2.7, 4) intrvlshort3 = Interval(2.9, 3.5, 5) intrvlshort4 = Interval(5., 7., 6) intrvlshort5 = Interval(9., 12., 7) intrvlshort6 = Interval(14., 16., 8) intrvlslong = IntervalList([intrvl_long2, intrvl_long1]) intrvlsshort = IntervalList([ intrvlshort2, intrvlshort5, intrvlshort3, intrvlshort1, intrvlshort4, intrvlshort6 ]) intrvlsminusrec = intrvlslong.minus(intrvlsshort) self.assertEqual(len(intrvlsminusrec.intrvls), 7) self.assertEqual(intrvlsminusrec.intrvls[0].__repr__(), "<Interval start:1.0 end:2.0 payload:1>") self.assertEqual(intrvlsminusrec.intrvls[1].__repr__(), "<Interval start:2.7 end:2.9 payload:1>") self.assertEqual(intrvlsminusrec.intrvls[2].__repr__(), "<Interval start:3.5 end:5.0 payload:1>") self.assertEqual(intrvlsminusrec.intrvls[3].__repr__(), "<Interval start:3.5 end:5.0 payload:2>") self.assertEqual(intrvlsminusrec.intrvls[4].__repr__(), "<Interval start:7.0 end:9.0 payload:1>") self.assertEqual(intrvlsminusrec.intrvls[5].__repr__(), "<Interval start:7.0 end:9.0 payload:2>") self.assertEqual(intrvlsminusrec.intrvls[6].__repr__(), "<Interval start:12.0 end:14.0 payload:2>") intrvlsminusnonrec = intrvlslong.minus(intrvlsshort, recursive_diff=False) self.assertEqual(len(intrvlsminusnonrec.intrvls), 15) intrvlsminusrec = intrvlslong.minus(intrvlsshort, payload_merge_op=payload_second) self.assertEqual(len(intrvlsminusrec.intrvls), 7) self.assertEqual(intrvlsminusrec.intrvls[0].__repr__(), "<Interval start:1.0 end:2.0 payload:3>") self.assertEqual(intrvlsminusrec.intrvls[1].__repr__(), "<Interval start:2.7 end:2.9 payload:4>") self.assertEqual(intrvlsminusrec.intrvls[2].__repr__(), "<Interval start:3.5 end:5.0 payload:5>") self.assertEqual(intrvlsminusrec.intrvls[3].__repr__(), "<Interval start:3.5 end:5.0 payload:5>") self.assertEqual(intrvlsminusrec.intrvls[4].__repr__(), "<Interval start:7.0 end:9.0 payload:6>") self.assertEqual(intrvlsminusrec.intrvls[5].__repr__(), "<Interval start:7.0 end:9.0 payload:6>") self.assertEqual(intrvlsminusrec.intrvls[6].__repr__(), "<Interval start:12.0 end:14.0 payload:7>") intrvlsminusrec = intrvlslong.minus(intrvlsshort, predicate=overlaps_before()) self.assertEqual(len(intrvlsminusrec.intrvls), 2) self.assertEqual(intrvlsminusrec.intrvls[0].__repr__(), "<Interval start:1.0 end:9.0 payload:1>") self.assertEqual(intrvlsminusrec.intrvls[1].__repr__(), "<Interval start:3.0 end:14.0 payload:2>")
def compute_shots(microshot_boundaries, faces_scanner, frames, video): print('Number of microshots: ', len(microshot_boundaries)) faces_per_frame = IntervalList([ (frame, frame, facelist) for frame, facelist in zip(frames, faces_scanner) ]) transitions = IntervalList([(boundary - 1, boundary, 0) for boundary in microshot_boundaries]) faces_at_boundaries = faces_per_frame.filter_against( transitions, predicate=overlaps()).filter(payload_satisfies(length_at_least(1))) # Get all transitions where there are faces before and after the transition # This IntervalList's payload is stil 0 transitions_with_faces = transitions.filter_against( faces_at_boundaries, predicate=starts_inv()).filter_against( transitions.filter_against(faces_at_boundaries, predicate=finishes_inv()), predicate=equal()) # Annotate transitions_with_faces with the list of faces before and after # every transition transitions_with_faces_at_start_of_transition = transitions_with_faces.merge( faces_at_boundaries, predicate=starts_inv(), payload_merge_op=payload_second) transitions_with_faces_at_end_of_transition = transitions_with_faces.merge( faces_at_boundaries, predicate=finishes_inv(), payload_merge_op=payload_second) transitions_with_faces = transitions_with_faces_at_start_of_transition.merge( transitions_with_faces_at_end_of_transition, predicate=equal(), payload_merge_op=lambda starting_faces, ending_faces: { 'starts': starting_faces, 'finishes': ending_faces }) # Get all the transitions where the faces at the start and the end are # the same def face_list_stays_the_same(start_finishes_payload): """ Define a scene graph by the face positions at the start and check if the face positions at the end satisfy it. """ graph = { 'nodes': [{ 'name': 'face{}'.format(idx), 'predicates': [ position(face.x1, face.y1, face.x2, face.y2, epsilon=POSITION_EPSILON), lambda face: face['score'] > MINIMUM_FACE_PROBABILITY ] } for idx, face in enumerate(start_finishes_payload['starts']) if face.score > MINIMUM_FACE_PROBABILITY], 'edges': [] } return scene_graph(graph, exact=True)([{ 'x1': face.x1, 'y1': face.y1, 'x2': face.x2, 'y2': face.y2, 'score': face.score } for face in start_finishes_payload['finishes']]) bad_transitions = transitions_with_faces.filter( payload_satisfies(face_list_stays_the_same)) print(bad_transitions.size()) # Finally, compute shot boundaries def convert_shot_boundaries_to_shots(shot_boundary_list): """ Helper function to convert an IntervalList of shot boundaries to an IntervalList of shots. shot_boundary_list should have the start and end of the movie as boundaries. """ def fold_boundaries_to_shots(acc, frame): if acc == []: return [frame.copy()] top = acc[-1] top.end = frame.start - 1 if top.length() > 0: acc.append(frame.copy()) else: top.end = frame.start return acc return shot_boundary_list.fold_list(fold_boundaries_to_shots, []) # Convert microshot boundaries to IntervalList shot_boundaries = IntervalList([ (boundary, boundary, 0) for boundary in list(set([0, video.num_frames] + microshot_boundaries)) ]) microshots = convert_shot_boundaries_to_shots(shot_boundaries) # Filter out short microshots short_microshots = microshots.filter_length( max_length=math.floor(MINIMUM_SHOT_DURATION * video.fps)) shots = microshots.set_union( short_microshots.map( lambda i: (i.start, i.end + 1, i.payload)).coalesce()).coalesce() # Remove shots that start with the bad boundaries we found earlier bad_shots = shots.filter_against( bad_transitions.map(lambda i: (i.start + 1, i.end, i.payload)), predicate=starts_inv()) shot_boundaries = shots.map(lambda i: (i.start, i.start, i.payload)) shot_boundaries_without_bad_shots = shot_boundaries.minus(bad_shots) shots = convert_shot_boundaries_to_shots(shot_boundaries_without_bad_shots) return shots
def detect_commercial_rekall(video, transcript_path, blackframe_list=None, histogram=None, verbose=True): """ API for detecting commercial blocks from TV news video using rekall @video: django query set @transcript_path: transcript_path @blackframe_list: list of black frames index @histogram: list of histogram 16x3 bin for each frame, not used if blackframe_list is provided Return: commercial_list (list of tuple((start_fid, start_sec), (end_fid, end_sec)), None if failed) """ transcript = load_transcript(transcript_path) if blackframe_list is None: blackframe_intervallist = get_blackframe_list(histogram) else: blackframe_intervallist = IntervalList([ (fid2second(fid, video.fps), fid2second(fid + 1, video.fps), 0) for fid in blackframe_list ]) black_windows = blackframe_intervallist \ .dilate(1. / video.fps) \ .coalesce() \ .dilate(-1. / video.fps) \ .filter_length(min_length=MIN_BLACKWINDOW * 1. / video.fps) # if verbose: # print("black window: ({})\n".format(black_windows.size())) # for idx, win in enumerate(black_windows.get_intervals()): # print(idx, win) # get all instances of >>, Announcer:, and >> Announcer: in transcript arrow_text = get_text_intervals(">>", transcript) announcer_text = get_text_intervals("Announcer:", transcript) arrow_announcer_text = get_text_intervals(">> Announcer:", transcript) # if verbose: # print('arrow_text', arrow_text) # print('announcer_text', announcer_text) # print('arrow_announcer_text', arrow_announcer_text) # get an interval for the whole video whole_video = IntervalList([(0., video.num_frames / video.fps, 0)]) # whole video minus black windows to get segments in between black windows # then filter out anything that overlaps with ">>" as long as it's not # ">> Announcer:" # then coalesce, as long as it doesn't get too long def fold_fn(stack, interval): if len(stack) == 0: stack.append(interval) else: last = stack.pop() if or_pred(overlaps(), after(max_dist=1), arity=2)(interval, last): if last.merge(interval).length() > MAX_COMMERCIAL_TIME: if last.length() > MAX_COMMERCIAL_TIME: stack.append( Interval(last.start, last.start + MAX_COMMERCIAL_TIME, last.payload)) else: stack.append(last) stack.append(interval) else: stack.append(last.merge(interval)) else: stack.append(last) stack.append(interval) return stack all_blocks = whole_video.minus(black_windows) non_commercial_blocks = all_blocks.filter_against( arrow_text.minus(arrow_announcer_text), predicate=overlaps()) commercial_blocks = whole_video.minus(non_commercial_blocks) if verbose: print("commercial blocks candidates: ({})\n".format( commercial_blocks.size())) for idx, win in enumerate(commercial_blocks.get_intervals()): print(idx, win) commercials = commercial_blocks \ .fold_list(fold_fn, []) \ .filter_length(min_length = MIN_COMMERCIAL_TIME) # commercials = whole_video \ # .minus(black_windows) \ # .filter_against( # arrow_text.filter_against(arrow_announcer_text, # predicate=not_pred(overlaps(), arity=2)), # predicate=not_pred(overlaps(), arity=2) # ) \ # .set_union(black_windows) \ # .fold_list(fold_fn, []) \ # .filter_length(min_length = MIN_COMMERCIAL_TIME) if verbose: print("commercials from blackwindow:\n", commercials) # add in lowercase intervals lowercase_intervals = get_lowercase_intervals(transcript) if verbose: print("lowercase intervals:\n", lowercase_intervals) commercials = commercials \ .set_union(lowercase_intervals) \ .dilate(MIN_COMMERCIAL_GAP / 2) \ .coalesce() \ .dilate(MIN_COMMERCIAL_GAP / 2) if verbose: print("commercials merge with lowercase:\n", commercials) # if verbose: # print(whole_video) # print(IntervalList([ # (start_sec - TRANSCRIPT_DELAY, end_sec - TRANSCRIPT_DELAY, 0) # for text, start_sec, end_sec in transcript # ]).coalesce().size()) # get blank intervals blank_intervals = whole_video.minus( IntervalList([ (start_sec - TRANSCRIPT_DELAY, end_sec - TRANSCRIPT_DELAY, 0) for text, start_sec, end_sec in transcript ]).coalesce()).coalesce().filter_length(min_length=MIN_BLANKWINDOW, max_length=MAX_BLANKWINDOW) if verbose: print("blank intervals:\n", blank_intervals) # add in blank intervals, but only if adding in the new intervals doesn't # get too long commercials = commercials.merge(blank_intervals, predicate=or_pred(before(max_dist=MAX_MERGE_GAP), after(max_dist=MAX_MERGE_GAP), arity=2), working_window=MAX_MERGE_GAP ) \ .filter_length(max_length=MAX_MERGE_DURATION) \ .set_union(commercials) \ .dilate(MIN_COMMERCIAL_GAP / 2) \ .coalesce() \ .dilate(MIN_COMMERCIAL_GAP / 2) if verbose: print("commercials merge with lowercase:\n", commercials) # post-process commercials to get rid of gaps, small commercials, and # islated blocks small_gaps = whole_video \ .minus(commercials) \ .filter_length(max_length = MAX_COMMERCIAL_GAP) \ .filter_against( arrow_text.filter_against( announcer_text, predicate=not_pred(overlaps()), working_window=1.0 ), predicate=not_pred(overlaps()), working_window=1.0) # merge with small gaps, but only if that doesn't make things too long commercials = commercials \ .set_union(small_gaps.dilate(0.1)) \ .coalesce() \ .filter_length(max_length=MAX_COMMERCIAL_TIME) \ .set_union(commercials) \ .coalesce() # get isolated commercials not_isolated_commercials = commercials.filter_against( commercials, predicate=or_pred(before(max_dist=MAX_COMMERCIAL_TIME), after(max_dist=MAX_COMMERCIAL_TIME), arity=2), working_window=MAX_COMMERCIAL_TIME) isolated_commercials = commercials.minus(not_isolated_commercials) commercials_to_delete = isolated_commercials \ .filter_length(max_length=MIN_COMMERCIAL_TIME_FINAL) \ .set_union(isolated_commercials \ .filter_against(blank_intervals, predicate=equal()) \ .filter_length(max_length=MAX_ISOLATED_BLANK_TIME)) commercials = commercials.minus(commercials_to_delete) return commercials
def detect_commercial_rekall(video, transcript_path, blackframe_list=None, histogram=None, debug=True, verbose=False): """ API for detecting commercial blocks from TV news video using rekall @video: django query set @transcript_path: transcript_path @blackframe_list: list of black frames index @histogram: list of histogram 16x3 bin for each frame, not used if blackframe_list is provided Return: commercial_list (list of tuple((start_fid, start_sec), (end_fid, end_sec)), None if failed) """ if type(video) == dict: fps = video['fps'] video_length = video['num_frames'] / fps else: fps = video.fps video_length = video.num_frames / video.fps transcript = load_transcript(transcript_path) if blackframe_list is None: blackframe_intervallist = get_blackframe_list(histogram) else: blackframe_intervallist = IntervalList([(fid2second(fid, fps), fid2second(fid + 1, fps), 0) for fid in blackframe_list]) # get black windows black_windows = blackframe_intervallist \ .dilate(1. / fps) \ .coalesce() \ .dilate(-1. / fps) # .filter_length(min_length=MIN_BLACKWINDOW * 1. / video.fps) if verbose: print("black window: ({})\n".format(black_windows.size())) for idx, win in enumerate(black_windows.get_intervals()): print(idx, win) # get all instances of >> arrow_intervals = get_text_intervals(">>", transcript) arrow_announcer_intervals = get_text_intervals(">> Announcer:", transcript) arrow_having_intervals = get_text_intervals(">> HAVING", transcript) if verbose: print("arrow_text: ({})\n".format(arrow_intervals.size()), arrow_intervals) print("arrow_announcer_text: ({})\n".format(arrow_announcer_intervals.size()), arrow_announcer_intervals) # get intervals for the whole transcript transcript_intervals = IntervalList([ (start_sec, end_sec, 0) for text, start_sec, end_sec in transcript if not '{' in text ]).dilate(1) \ .coalesce() \ .dilate(-1) \ # get an interval for the whole video whole_video = IntervalList([(0., video_length, 0)]) # whole video minus black windows to get segments in between black windows # then filter out anything that overlaps with ">>" as long as it's not ">> Announcer:" # then coalesce, as long as it doesn't get too long def fold_fn(stack, interval): if interval.length() > MAX_COMMERCIAL_TIME: interval = Interval(interval.start, interval.start + MAX_COMMERCIAL_TIME, interval.payload) if len(stack) == 0: stack.append(interval) else: last = stack.pop() if or_pred(overlaps(), after(max_dist=5), arity=2)(interval, last): if last.merge(interval).length() > MAX_COMMERCIAL_TIME: stack.append(Interval( last.start, last.start + MAX_COMMERCIAL_TIME, last.payload)) else: stack.append(last.merge(interval)) else: stack.append(last) stack.append(interval) return stack # get reliable double arrow intervals reliable_transcripts = transcript_intervals.filter_length(min_length=RELIABLE_TEXT_DURATION) arrow_intervals = arrow_intervals \ .minus(arrow_announcer_intervals) \ .minus(arrow_having_intervals) \ .filter_against( reliable_transcripts, predicate=overlaps() ) # get non-commercial blocks by filtering out intervals overlaps with >> all_blocks = whole_video.minus(black_windows) non_commercial_blocks = all_blocks.filter_against( arrow_intervals, predicate=overlaps() ) commercial_blocks = whole_video.minus(non_commercial_blocks.set_union(black_windows)) if verbose: print("commercial blocks candidates: ({})\n".format(commercial_blocks.size())) for idx, win in enumerate(commercial_blocks.get_intervals()): print(idx, win) commercials = commercial_blocks \ .fold_list(fold_fn, []) \ .filter_length(min_length = MIN_COMMERCIAL_TIME) commercials_raw = copy.deepcopy(commercials) if verbose: print("commercials from blackwindow:\n", commercials) # add in lowercase intervals lowercase_intervals = get_lowercase_intervals(transcript) if verbose: print("lowercase intervals:\n", lowercase_intervals) commercials = commercials.set_union(lowercase_intervals) if verbose: print("commercials merge with lowercase:\n", commercials) # get blank intervals blank_intervals = whole_video \ .minus(transcript_intervals) \ .filter_length(min_length=MIN_BLANKWINDOW, max_length=MAX_BLANKWINDOW) # remove last one minute segment due to no aligned transcripts blank_intervals = blank_intervals \ .minus(IntervalList([(video_length-60, video_length, 0)])) \ .filter_length(min_length=MIN_BLANKWINDOW) if verbose: print("blank intervals:\n", blank_intervals) # add in blank intervals commercials = commercials.set_union(blank_intervals) # commercials = commercials.merge(blank_intervals, # predicate=or_pred(before(max_dist=MAX_MERGE_GAP), # after(max_dist=MAX_MERGE_GAP), arity=2), # working_window=MAX_MERGE_GAP # ) \ # .filter_length(max_length=MAX_MERGE_DURATION) \ # .set_union(commercials) \ # .dilate(MIN_COMMERCIAL_GAP / 2) \ # .coalesce() \ # .dilate(-MIN_COMMERCIAL_GAP / 2) if verbose: print("commercials merge with blank intervals:\n", commercials) # merge with small gaps, but only if that doesn't make things too long commercials = commercials \ .dilate(MAX_MERGE_GAP / 2) \ .coalesce() \ .dilate(-MAX_MERGE_GAP / 2) \ .filter_length(max_length=MAX_COMMERCIAL_TIME) \ .set_union(commercials_raw) \ .set_union(lowercase_intervals) \ .set_union(blank_intervals) \ .coalesce() # # post-process commercials to get rid of gaps, small commercials, and # # islated blocks # small_gaps = whole_video \ # .minus(commercials) \ # .filter_length(max_length = MAX_COMMERCIAL_GAP) \ # .filter_against( # arrow_text.filter_against( # announcer_text, # predicate=not_pred(overlaps()), # working_window=1.0 # ), predicate=not_pred(overlaps()), # working_window=1.0) # # merge with small gaps, but only if that doesn't make things too long # commercials = commercials \ # .set_union(small_gaps.dilate(0.1)) \ # .coalesce() \ # .filter_length(max_length=MAX_COMMERCIAL_TIME) \ # .set_union(commercials) \ # .coalesce() # # get isolated commercials # not_isolated_commercials = commercials.filter_against(commercials, # predicate=or_pred(before(max_dist=MAX_COMMERCIAL_TIME), # after(max_dist=MAX_COMMERCIAL_TIME), arity=2), # working_window=MAX_COMMERCIAL_TIME) # isolated_commercials = commercials.minus(not_isolated_commercials) # commercials_to_delete = isolated_commercials \ # .filter_length(max_length=MIN_COMMERCIAL_TIME_FINAL) \ # .set_union(isolated_commercials \ # .filter_against(blank_intervals, predicate=equal()) \ # .filter_length(max_length=MAX_ISOLATED_BLANK_TIME)) # commercials = commercials.minus(commercials_to_delete) if debug: result = {'black': black_windows.dilate(2), 'arrow': arrow_intervals.dilate(2), 'commercials_raw': commercials_raw, 'lowercase': lowercase_intervals, 'blank': blank_intervals, 'commercials': commercials, } return result else: result = [(i.start, i.end) for i in commercials.get_intervals()] return result