def caption_metadata_for_video(video_id): metadata_file = os.path.join(CAPTION_METADATA_DIR, str(video_id) + '_submeta.json') if os.path.exists(metadata_file): with open(metadata_file) as json_data: video_captions = json.load(json_data) intervals = [] for cap in video_captions: start = cap['original_time'][0] end = cap['original_time'][1] aligned = False speaker = clean_speaker( cap['speaker']) if 'speaker' in cap else None if 'aligned_time' in cap: start = cap['aligned_time'][0] end = cap['aligned_time'][1] aligned = True intervals.append( Interval(start, end, payload={ 'aligned': aligned, 'full_line': cap['line'], 'speaker': speaker, 'man_start': cap['original_time'][0], 'man_end': cap['original_time'][1] })) return IntervalList(intervals) return IntervalList([])
def test_join(self): intrvla1 = Interval(1., 25., 1) intrvla2 = Interval(52., 55., 1) intrvla3 = Interval(100., 110., 1) intrvla4 = Interval(200., 210., 1) intrvlb1 = Interval(12., 26., 2) intrvlb2 = Interval(50., 53., 2) intrvlb3 = Interval(101., 105., 2) intrvlb4 = Interval(190., 220., 2) intrvlsa = IntervalList([intrvla2, intrvla3, intrvla1, intrvla4]) intrvlsb = IntervalList([intrvlb2, intrvlb3, intrvlb1, intrvlb4]) def predicate(x, y): return x.start == 1. and y.start == 12. def merge_op(x, y): return [Interval(1., 100., 25)] intrvlsudf = intrvlsa.join(intrvlsb, merge_op=merge_op, predicate=predicate) self.assertEqual(len(intrvlsudf.intrvls), 1) self.assertEqual(intrvlsudf.intrvls[0].__repr__(), "<Interval start:1.0 end:100.0 payload:25>")
def test_filter(self): intrvl1 = Interval(1., 2., 1) intrvl2 = Interval(1.5, 2.5, 2) intrvls1 = IntervalList([intrvl1, intrvl2]) intrvls1 = intrvls1.filter(lambda intrvl: intrvl.start > 1.1) self.assertEqual(len(intrvls1.intrvls), 1) self.assertEqual(intrvls1.intrvls[0].__repr__(), "<Interval start:1.5 end:2.5 payload:2>")
def test_filter_length(self): intrvl1 = Interval(1., 2., 1) intrvl2 = Interval(1.5, 3.5, 2) intrvls1 = IntervalList([intrvl1, intrvl2]) intrvls1 = intrvls1.filter_length(min_length=1.1) self.assertEqual(len(intrvls1.intrvls), 1) self.assertEqual(intrvls1.intrvls[0].__repr__(), "<Interval start:1.5 end:3.5 payload:2>") intrvls1 = intrvls1.filter_length(max_length=1.8) self.assertEqual(len(intrvls1.intrvls), 0)
def test_minus_against_nothing(self): intrvl_long1 = Interval(1., 10., 1) intrvl_long2 = Interval(3., 15., 2) intrvlslong = IntervalList([intrvl_long2, intrvl_long1]) intrvlshort1 = Interval(20., 20.5, 3) intrvlsshort = IntervalList([intrvlshort1]) intrvlsminusrec = intrvlslong.minus(intrvlsshort) self.assertEqual(len(intrvlsminusrec.intrvls), 2) self.assertEqual(intrvlsminusrec.intrvls[0].__repr__(), "<Interval start:1.0 end:10.0 payload:1>") self.assertEqual(intrvlsminusrec.intrvls[1].__repr__(), "<Interval start:3.0 end:15.0 payload:2>")
def test_init(self): intrvl1 = Interval(1., 2., 1) intrvl2 = Interval(1.5, 2.5, 2) intrvls1 = IntervalList([intrvl1, intrvl2]) self.assertEqual(intrvls1.intrvls[0].__repr__(), "<Interval start:1.0 end:2.0 payload:1>") self.assertEqual(intrvls1.intrvls[1].__repr__(), "<Interval start:1.5 end:2.5 payload:2>") intrvls1 = IntervalList([intrvl2, intrvl1]) self.assertEqual(intrvls1.intrvls[0].__repr__(), "<Interval start:1.0 end:2.0 payload:1>") self.assertEqual(intrvls1.intrvls[1].__repr__(), "<Interval start:1.5 end:2.5 payload:2>")
def get_caption_intrvlcol(phrase, video_ids=None): results = phrase_search(phrase, video_ids) if video_ids == None: videos = {v.id: v for v in Video.objects.all()} else: videos = { v.id: v for v in Video.objects.filter(id__in=video_ids).all() } def convert_time(k, t): return int(t * videos[k].fps) flattened = [(doc.id, convert_time(doc.id, p.start), convert_time(doc.id, p.end)) for doc in results for p in doc.postings] phrase_intrvllists = {} for video_id, t1, t2 in flattened: if video_id in phrase_intrvllists: phrase_intrvllists[video_id].append((t1, t2, 0)) else: phrase_intrvllists[video_id] = [(t1, t2, 0)] for video_id, intrvllist in phrase_intrvllists.items(): phrase_intrvllists[video_id] = IntervalList(intrvllist) phrase_intrvlcol = VideoIntervalCollection(phrase_intrvllists) print('Get {} intervals for phrase \"{}\"'.format( count_intervals(phrase_intrvlcol), phrase)) return phrase_intrvlcol
def test_filter_against(self): intrvl_long1 = Interval(1., 10., 1) intrvl_long2 = Interval(3., 15., 2) intrvlshort1 = Interval(2., 2.5, 3) intrvlshort2 = Interval(2., 2.7, 4) intrvlshort3 = Interval(2.9, 3.5, 5) intrvlslong = IntervalList([intrvl_long2, intrvl_long1]) intrvlsshort = IntervalList([intrvlshort1, intrvlshort2, intrvlshort3]) intrvlsfiltered = intrvlslong.filter_against(intrvlsshort, predicate=during_inv()) self.assertEqual(len(intrvlsfiltered.intrvls), 1) self.assertEqual(intrvlsfiltered.intrvls[0].__repr__(), intrvl_long1.__repr__())
def topic_search_to_intrvllists(topic_search_result, video_ids=None, payload=0): """ Converts a topic search result from esper.captions to IntervalLists. @topic_search_result is just the output from captions#topic_search. Returns a dict from video IDs to IntervalLists. """ if video_ids == None: videos = {v.id: v for v in Video.objects.all()} else: videos = {v.id: v for v in Video.objects.filter(id__in=video_ids).all()} def convert_time(k, t): return int(t * videos[k].fps) segments_by_video = {} flattened = [ (v.id, convert_time(v.id, l.start), convert_time(v.id, l.end)) for v in topic_search_result.documents if v.id in videos for l in v.locations ] for video_id, t1, t2 in flattened: if video_id in segments_by_video: segments_by_video[video_id].append((t1, t2, payload)) else: segments_by_video[video_id] = [(t1, t2, payload)] for video in segments_by_video: segments_by_video[video] = IntervalList(segments_by_video[video]) return segments_by_video
def intrvlcol_second2frame(intrvlcol): intrvllists_frame = {} for video_id, intrvllist in intrvlcol.get_allintervals().items(): video = Video.objects.filter(id=video_id)[0] fps = video.fps intrvllists_frame[video_id] = IntervalList([(int(i.start * fps), int(i.end * fps), i.payload) \ for i in intrvllist.get_intervals()] ) return VideoIntervalCollection(intrvllists_frame)
def test_fold(self): intrvla1 = Interval(1., 25., 1) intrvla2 = Interval(52., 55., 1) intrvla3 = Interval(100., 110., 1) intrvla4 = Interval(200., 210., 1) intrvlsa = IntervalList([intrvla2, intrvla3, intrvla1, intrvla4]) total_payload = intrvlsa.fold(lambda acc, intrvl: acc + intrvl.payload, 0.) self.assertEqual(total_payload, 4) total_length = intrvlsa.fold( lambda acc, intrvl: acc + (intrvl.end - intrvl.start), 0.) self.assertEqual(total_length, 47.0) def fold_fn(acc, intrvl): acc.append(intrvl) return acc intrvlsa = IntervalList(intrvlsa.fold(fold_fn, [])) self.assertEqual(len(intrvlsa.intrvls), 4) self.assertEqual(intrvlsa.intrvls[0].__repr__(), "<Interval start:1.0 end:25.0 payload:1>") self.assertEqual(intrvlsa.intrvls[1].__repr__(), "<Interval start:52.0 end:55.0 payload:1>") self.assertEqual(intrvlsa.intrvls[2].__repr__(), "<Interval start:100.0 end:110.0 payload:1>") self.assertEqual(intrvlsa.intrvls[3].__repr__(), "<Interval start:200.0 end:210.0 payload:1>")
def caption_scan_to_intrvllists(scan_result, search_terms, video_ids=None, dilation=0, payload=0): """ Converts an ngram scan result from esper.captions to IntervalLists. @scan_result is an array of pairs of (video_id, video_result). video_result is in turn an array whose i-th item is an array of results for search_terms[i]. Each array of results is an array of (start, end) tuples. This is just the result returned by scan_for_ngrams_in_parallel(search_terms, video_ids). Returns an array of dicts. The ith member of this array is a dict mapping from video IDs to intervals for search_terms[i]. """ search_terms_intrvllists = [{} for term in search_terms] if video_ids == None: videos = {v.id: v for v in Video.objects.all()} else: videos = { v.id: v for v in Video.objects.filter(id__in=video_ids).all() } def convert_time(k, t): return int(t * videos[k].fps) for video_id, result in scan_result: if result == []: continue for i, term in enumerate(search_terms): term_result = result[i] interval_list = IntervalList([ (convert_time(video_id, start - dilation), convert_time(video_id, end + dilation), payload) for start, end in term_result ]) if interval_list.size() > 0: search_terms_intrvllists[i][video_id] = interval_list return search_terms_intrvllists
def test_coalesce(self): intrvl1 = Interval(1., 2., 1) intrvl2 = Interval(1.5, 2.5, 2) intrvls1 = IntervalList([intrvl1, intrvl2]) intrvlscoalesced = intrvls1.coalesce() self.assertEqual(len(intrvlscoalesced.intrvls), 1) self.assertEqual(intrvlscoalesced.intrvls[0].__repr__(), "<Interval start:1.0 end:2.5 payload:1>") intrvlscoalesced_samepayload = intrvls1.coalesce( predicate=lambda i1, i2: i1.payload == i2.payload) self.assertEqual(len(intrvlscoalesced_samepayload.intrvls), 2) self.assertEqual(intrvlscoalesced_samepayload.intrvls[0].__repr__(), "<Interval start:1.0 end:2.0 payload:1>") self.assertEqual(intrvlscoalesced_samepayload.intrvls[1].__repr__(), "<Interval start:1.5 end:2.5 payload:2>")
def test_dilate(self): intrvl1 = Interval(1., 2., 1) intrvl2 = Interval(1.5, 2.5, 2) intrvls1 = IntervalList([intrvl1, intrvl2]).dilate(0.2) self.assertEqual(intrvls1.intrvls[0].__repr__(), "<Interval start:0.8 end:2.2 payload:1>") self.assertEqual(intrvls1.intrvls[1].__repr__(), "<Interval start:1.3 end:2.7 payload:2>")
def __init__(self, video_ids_to_intervals): """ video_ids_to_intervals is a dict mapping from video ID to either lists of (start, end, payload) tuples or an IntervalList. """ self.intervals = { video_id: IntervalList(video_ids_to_intervals[video_id]) for video_id in video_ids_to_intervals }
def test_map(self): intrvla1 = Interval(1., 25., 1) intrvla2 = Interval(52., 55., 1) intrvla3 = Interval(100., 110., 1) intrvla4 = Interval(200., 210., 1) intrvlsa = IntervalList([intrvla2, intrvla3, intrvla1, intrvla4]) intrvlsa = intrvlsa.map(lambda intrvl: Interval( intrvl.start + 1, intrvl.end + 1, intrvl.payload)) self.assertEqual(len(intrvlsa.intrvls), 4) self.assertEqual(intrvlsa.intrvls[0].__repr__(), "<Interval start:2.0 end:26.0 payload:1>") self.assertEqual(intrvlsa.intrvls[1].__repr__(), "<Interval start:53.0 end:56.0 payload:1>") self.assertEqual(intrvlsa.intrvls[2].__repr__(), "<Interval start:101.0 end:111.0 payload:1>") self.assertEqual(intrvlsa.intrvls[3].__repr__(), "<Interval start:201.0 end:211.0 payload:1>")
def get_blackframe_list(video, histogram): """ Get all black frames by checking the histogram list """ pixel_sum = video.height * video.width thresh = MIN_BLACKFRAME * pixel_sum blackframe_list = [] for fid, hist in enumerate(histogram): if hist[0][0] > MIN_BLACKFRAME and hist[0][ 1] > MIN_BLACKFRAME and hist[0][2] > MIN_BLACKFRAME: blackframe_list.append(fid) return IntervalList([(fid2second(fid, video.fps), fid2second(fid + 1, video.fps), 0) for fid in blackframe_list])
def test_overlaps(self): intrvla1 = Interval(1., 25., 1) intrvla2 = Interval(52., 55., 1) intrvla3 = Interval(100., 110., 1) intrvla4 = Interval(200., 210., 1) intrvlb1 = Interval(12., 26., 2) intrvlb2 = Interval(50., 53., 2) intrvlb3 = Interval(101., 105., 2) intrvlb4 = Interval(190., 220., 2) intrvlsa = IntervalList([intrvla2, intrvla3, intrvla1, intrvla4]) intrvlsb = IntervalList([intrvlb2, intrvlb3, intrvlb1, intrvlb4]) intrvlsoverlap = intrvlsa.overlaps(intrvlsb) self.assertEqual(len(intrvlsoverlap.intrvls), 4) self.assertEqual(intrvlsoverlap.intrvls[0].__repr__(), "<Interval start:12.0 end:25.0 payload:1>") self.assertEqual(intrvlsoverlap.intrvls[1].__repr__(), "<Interval start:52.0 end:53.0 payload:1>") self.assertEqual(intrvlsoverlap.intrvls[2].__repr__(), "<Interval start:101.0 end:105.0 payload:1>") self.assertEqual(intrvlsoverlap.intrvls[3].__repr__(), "<Interval start:200.0 end:210.0 payload:1>") intrvlsoverlap = intrvlsb.overlaps(intrvlsa) self.assertEqual(len(intrvlsoverlap.intrvls), 4) self.assertEqual(intrvlsoverlap.intrvls[0].__repr__(), "<Interval start:12.0 end:25.0 payload:2>") self.assertEqual(intrvlsoverlap.intrvls[1].__repr__(), "<Interval start:52.0 end:53.0 payload:2>") self.assertEqual(intrvlsoverlap.intrvls[2].__repr__(), "<Interval start:101.0 end:105.0 payload:2>") self.assertEqual(intrvlsoverlap.intrvls[3].__repr__(), "<Interval start:200.0 end:210.0 payload:2>") intrvlsoverlap = intrvlsa.overlaps(intrvlsb, predicate=overlaps_before()) self.assertEqual(len(intrvlsoverlap.intrvls), 1) self.assertEqual(intrvlsoverlap.intrvls[0].__repr__(), "<Interval start:12.0 end:25.0 payload:1>") intrvlsoverlap = intrvlsa.overlaps(intrvlsb, predicate=overlaps_before(), payload_merge_op=payload_second) self.assertEqual(len(intrvlsoverlap.intrvls), 1) self.assertEqual(intrvlsoverlap.intrvls[0].__repr__(), "<Interval start:12.0 end:25.0 payload:2>")
def split_intrvlcol(intrvlcol, seg_length): intrvllists_split = {} for video_id, intrvllist in intrvlcol.get_allintervals().items(): intervals_split = [] for i in intrvllist.get_intervals(): duration = i.end - i.start start = i.start while duration > 0: if duration > seg_length: intervals_split.append((start, start + seg_length, i.payload)) duration -= seg_length start += seg_length else: intervals_split.append((start, start + duration, i.payload)) duration = 0 intrvllists_split[video_id] = IntervalList(intervals_split) return VideoIntervalCollection(intrvllists_split)
def get_lowercase_intervals(transcript): def is_lower_text(text): lower = [c for c in text if c.islower()] alpha = [c for c in text if c.isalpha()] if len(alpha) == 0: return False if 1. * len(lower) / len(alpha) > MIN_LOWERTEXT: return True else: return False return IntervalList([ (start_sec, end_sec, 0) for text, start_sec, end_sec in transcript if is_lower_text(text)]) \ .dilate(MAX_LOWERWINDOW_GAP / 2) \ .coalesce() \ .dilate(-1 * MAX_LOWERWINDOW_GAP / 2) \ .filter_length(min_length=MIN_LOWERWINDOW)
def iterable_to_intrvllists(iterable, accessor, groupby="video_id", schema=None): """ Convert an iterable collection of rows to a collection of intervallists. Returns a dict that maps from values of the groupby field to temporal rangelists. @array is a list of rows of data, and @accessor takes in a row and a field name and returns the value. For example, accessor(row, 'id'). For example, if groupby is "video_id", groups the dataframe rows by the video_id field and returns a dict matching each unique video_id to a temporal rangelist. Schema defines how to get start, end, and payload for each interval from a single row in the dataframe. In particular, for each row in the dataframe, creates Interval(accessor(row, schema['start']), accessor(row, schema['end']), accessor(row, schema['payload'])) """ if schema is None: schema = { "start": "min_frame", "end": "max_frame", "payload": "id" } dictbykey = {} for row in iterable: if accessor(row, groupby) in dictbykey: dictbykey[accessor(row, groupby)].append(row) else: dictbykey[accessor(row, groupby)] = [row] intrvllists = {} for key in dictbykey.keys(): intrvllists[key] = IntervalList([ Interval(accessor(row, schema['start']), accessor(row, schema['end']), accessor(row, schema['payload'])) for row in dictbykey[key]]) return intrvllists
def test_set_union(self): intrvl1 = Interval(1., 2., 1) intrvl2 = Interval(1.5, 2.5, 2) intrvls1 = IntervalList([intrvl1]) intrvls2 = IntervalList([intrvl2]) intrvlsu = intrvls1.set_union(intrvls2) self.assertEqual(intrvlsu.intrvls[0].__repr__(), "<Interval start:1.0 end:2.0 payload:1>") self.assertEqual(intrvlsu.intrvls[1].__repr__(), "<Interval start:1.5 end:2.5 payload:2>") intrvlsu = intrvls2.set_union(intrvls1) self.assertEqual(intrvlsu.intrvls[0].__repr__(), "<Interval start:1.0 end:2.0 payload:1>") self.assertEqual(intrvlsu.intrvls[1].__repr__(), "<Interval start:1.5 end:2.5 payload:2>")
def boundaries_to_shots(boundaries): boundaries = [0] + boundaries boundary_list = IntervalList([(boundary, boundary, 0) for boundary in boundaries]) shots = boundary_list.fold_list(boundaries_to_shots_fold, []) return shots
# Compute face detection twice a second and before/after every microshot boundary frames = list(range(0, video.num_frames, int(round(video.fps) / 2))) frames_set = set(frames) frames_set = frames_set.union(set(boundaries)) frames_set = frames_set.union(set([boundary - 1 for boundary in boundaries if boundary > 0])) frames = sorted(list(frames_set)) # Compute face detections in Scanner faces = st.face_detection.detect_faces( db, videos=[video.for_scannertools() for video in videos] frames = [frames] ) # One interval for every microshot transition transitions = IntervalList([(boundary - 1, boundary, 0) for boundary in boundaries]) # One interval for every frame whose payload is list of faces in that frame faces_at_boundaries = IntervalList([ (frame, frame, facelist) for frame, facelist in zip(frames, faces[0].load())) ]).filter_against( transitions, predicate=overlaps() ).filter(payload_satisfies(length_at_least(1))) # Get all boundaries where there are faces before and after the boundary boundaries_that_have_faces = transitions.filter_against( faces_at_boundaries, predicate=starts_inv() # Faces at the start of this transition ).filter_against( transitions.filter_against(
def get_text_intervals(word, transcript): return IntervalList([ (start_sec, end_sec, 0) for text, start_sec, end_sec in transcript if word in text and '{' not in text ]).coalesce()
def compute_shots(microshot_boundaries, faces_scanner, frames, video): print('Number of microshots: ', len(microshot_boundaries)) faces_per_frame = IntervalList([ (frame, frame, facelist) for frame, facelist in zip(frames, faces_scanner) ]) transitions = IntervalList([(boundary - 1, boundary, 0) for boundary in microshot_boundaries]) faces_at_boundaries = faces_per_frame.filter_against( transitions, predicate=overlaps()).filter(payload_satisfies(length_at_least(1))) # Get all transitions where there are faces before and after the transition # This IntervalList's payload is stil 0 transitions_with_faces = transitions.filter_against( faces_at_boundaries, predicate=starts_inv()).filter_against( transitions.filter_against(faces_at_boundaries, predicate=finishes_inv()), predicate=equal()) # Annotate transitions_with_faces with the list of faces before and after # every transition transitions_with_faces_at_start_of_transition = transitions_with_faces.merge( faces_at_boundaries, predicate=starts_inv(), payload_merge_op=payload_second) transitions_with_faces_at_end_of_transition = transitions_with_faces.merge( faces_at_boundaries, predicate=finishes_inv(), payload_merge_op=payload_second) transitions_with_faces = transitions_with_faces_at_start_of_transition.merge( transitions_with_faces_at_end_of_transition, predicate=equal(), payload_merge_op=lambda starting_faces, ending_faces: { 'starts': starting_faces, 'finishes': ending_faces }) # Get all the transitions where the faces at the start and the end are # the same def face_list_stays_the_same(start_finishes_payload): """ Define a scene graph by the face positions at the start and check if the face positions at the end satisfy it. """ graph = { 'nodes': [{ 'name': 'face{}'.format(idx), 'predicates': [ position(face.x1, face.y1, face.x2, face.y2, epsilon=POSITION_EPSILON), lambda face: face['score'] > MINIMUM_FACE_PROBABILITY ] } for idx, face in enumerate(start_finishes_payload['starts']) if face.score > MINIMUM_FACE_PROBABILITY], 'edges': [] } return scene_graph(graph, exact=True)([{ 'x1': face.x1, 'y1': face.y1, 'x2': face.x2, 'y2': face.y2, 'score': face.score } for face in start_finishes_payload['finishes']]) bad_transitions = transitions_with_faces.filter( payload_satisfies(face_list_stays_the_same)) print(bad_transitions.size()) # Finally, compute shot boundaries def convert_shot_boundaries_to_shots(shot_boundary_list): """ Helper function to convert an IntervalList of shot boundaries to an IntervalList of shots. shot_boundary_list should have the start and end of the movie as boundaries. """ def fold_boundaries_to_shots(acc, frame): if acc == []: return [frame.copy()] top = acc[-1] top.end = frame.start - 1 if top.length() > 0: acc.append(frame.copy()) else: top.end = frame.start return acc return shot_boundary_list.fold_list(fold_boundaries_to_shots, []) # Convert microshot boundaries to IntervalList shot_boundaries = IntervalList([ (boundary, boundary, 0) for boundary in list(set([0, video.num_frames] + microshot_boundaries)) ]) microshots = convert_shot_boundaries_to_shots(shot_boundaries) # Filter out short microshots short_microshots = microshots.filter_length( max_length=math.floor(MINIMUM_SHOT_DURATION * video.fps)) shots = microshots.set_union( short_microshots.map( lambda i: (i.start, i.end + 1, i.payload)).coalesce()).coalesce() # Remove shots that start with the bad boundaries we found earlier bad_shots = shots.filter_against( bad_transitions.map(lambda i: (i.start + 1, i.end, i.payload)), predicate=starts_inv()) shot_boundaries = shots.map(lambda i: (i.start, i.start, i.payload)) shot_boundaries_without_bad_shots = shot_boundaries.minus(bad_shots) shots = convert_shot_boundaries_to_shots(shot_boundaries_without_bad_shots) return shots
def detect_commercial_rekall(video, transcript_path, blackframe_list=None, histogram=None, verbose=True): """ API for detecting commercial blocks from TV news video using rekall @video: django query set @transcript_path: transcript_path @blackframe_list: list of black frames index @histogram: list of histogram 16x3 bin for each frame, not used if blackframe_list is provided Return: commercial_list (list of tuple((start_fid, start_sec), (end_fid, end_sec)), None if failed) """ transcript = load_transcript(transcript_path) if blackframe_list is None: blackframe_intervallist = get_blackframe_list(histogram) else: blackframe_intervallist = IntervalList([ (fid2second(fid, video.fps), fid2second(fid + 1, video.fps), 0) for fid in blackframe_list ]) black_windows = blackframe_intervallist \ .dilate(1. / video.fps) \ .coalesce() \ .dilate(-1. / video.fps) \ .filter_length(min_length=MIN_BLACKWINDOW * 1. / video.fps) # if verbose: # print("black window: ({})\n".format(black_windows.size())) # for idx, win in enumerate(black_windows.get_intervals()): # print(idx, win) # get all instances of >>, Announcer:, and >> Announcer: in transcript arrow_text = get_text_intervals(">>", transcript) announcer_text = get_text_intervals("Announcer:", transcript) arrow_announcer_text = get_text_intervals(">> Announcer:", transcript) # if verbose: # print('arrow_text', arrow_text) # print('announcer_text', announcer_text) # print('arrow_announcer_text', arrow_announcer_text) # get an interval for the whole video whole_video = IntervalList([(0., video.num_frames / video.fps, 0)]) # whole video minus black windows to get segments in between black windows # then filter out anything that overlaps with ">>" as long as it's not # ">> Announcer:" # then coalesce, as long as it doesn't get too long def fold_fn(stack, interval): if len(stack) == 0: stack.append(interval) else: last = stack.pop() if or_pred(overlaps(), after(max_dist=1), arity=2)(interval, last): if last.merge(interval).length() > MAX_COMMERCIAL_TIME: if last.length() > MAX_COMMERCIAL_TIME: stack.append( Interval(last.start, last.start + MAX_COMMERCIAL_TIME, last.payload)) else: stack.append(last) stack.append(interval) else: stack.append(last.merge(interval)) else: stack.append(last) stack.append(interval) return stack all_blocks = whole_video.minus(black_windows) non_commercial_blocks = all_blocks.filter_against( arrow_text.minus(arrow_announcer_text), predicate=overlaps()) commercial_blocks = whole_video.minus(non_commercial_blocks) if verbose: print("commercial blocks candidates: ({})\n".format( commercial_blocks.size())) for idx, win in enumerate(commercial_blocks.get_intervals()): print(idx, win) commercials = commercial_blocks \ .fold_list(fold_fn, []) \ .filter_length(min_length = MIN_COMMERCIAL_TIME) # commercials = whole_video \ # .minus(black_windows) \ # .filter_against( # arrow_text.filter_against(arrow_announcer_text, # predicate=not_pred(overlaps(), arity=2)), # predicate=not_pred(overlaps(), arity=2) # ) \ # .set_union(black_windows) \ # .fold_list(fold_fn, []) \ # .filter_length(min_length = MIN_COMMERCIAL_TIME) if verbose: print("commercials from blackwindow:\n", commercials) # add in lowercase intervals lowercase_intervals = get_lowercase_intervals(transcript) if verbose: print("lowercase intervals:\n", lowercase_intervals) commercials = commercials \ .set_union(lowercase_intervals) \ .dilate(MIN_COMMERCIAL_GAP / 2) \ .coalesce() \ .dilate(MIN_COMMERCIAL_GAP / 2) if verbose: print("commercials merge with lowercase:\n", commercials) # if verbose: # print(whole_video) # print(IntervalList([ # (start_sec - TRANSCRIPT_DELAY, end_sec - TRANSCRIPT_DELAY, 0) # for text, start_sec, end_sec in transcript # ]).coalesce().size()) # get blank intervals blank_intervals = whole_video.minus( IntervalList([ (start_sec - TRANSCRIPT_DELAY, end_sec - TRANSCRIPT_DELAY, 0) for text, start_sec, end_sec in transcript ]).coalesce()).coalesce().filter_length(min_length=MIN_BLANKWINDOW, max_length=MAX_BLANKWINDOW) if verbose: print("blank intervals:\n", blank_intervals) # add in blank intervals, but only if adding in the new intervals doesn't # get too long commercials = commercials.merge(blank_intervals, predicate=or_pred(before(max_dist=MAX_MERGE_GAP), after(max_dist=MAX_MERGE_GAP), arity=2), working_window=MAX_MERGE_GAP ) \ .filter_length(max_length=MAX_MERGE_DURATION) \ .set_union(commercials) \ .dilate(MIN_COMMERCIAL_GAP / 2) \ .coalesce() \ .dilate(MIN_COMMERCIAL_GAP / 2) if verbose: print("commercials merge with lowercase:\n", commercials) # post-process commercials to get rid of gaps, small commercials, and # islated blocks small_gaps = whole_video \ .minus(commercials) \ .filter_length(max_length = MAX_COMMERCIAL_GAP) \ .filter_against( arrow_text.filter_against( announcer_text, predicate=not_pred(overlaps()), working_window=1.0 ), predicate=not_pred(overlaps()), working_window=1.0) # merge with small gaps, but only if that doesn't make things too long commercials = commercials \ .set_union(small_gaps.dilate(0.1)) \ .coalesce() \ .filter_length(max_length=MAX_COMMERCIAL_TIME) \ .set_union(commercials) \ .coalesce() # get isolated commercials not_isolated_commercials = commercials.filter_against( commercials, predicate=or_pred(before(max_dist=MAX_COMMERCIAL_TIME), after(max_dist=MAX_COMMERCIAL_TIME), arity=2), working_window=MAX_COMMERCIAL_TIME) isolated_commercials = commercials.minus(not_isolated_commercials) commercials_to_delete = isolated_commercials \ .filter_length(max_length=MIN_COMMERCIAL_TIME_FINAL) \ .set_union(isolated_commercials \ .filter_against(blank_intervals, predicate=equal()) \ .filter_length(max_length=MAX_ISOLATED_BLANK_TIME)) commercials = commercials.minus(commercials_to_delete) return commercials
def test_merge(self): intrvla1 = Interval(1., 25., 1) intrvla2 = Interval(52., 55., 1) intrvla3 = Interval(100., 110., 1) intrvla4 = Interval(200., 210., 1) intrvlb1 = Interval(12., 26., 2) intrvlb2 = Interval(50., 53., 2) intrvlb3 = Interval(101., 105., 2) intrvlb4 = Interval(190., 220., 2) intrvlsa = IntervalList([intrvla2, intrvla3, intrvla1, intrvla4]) intrvlsb = IntervalList([intrvlb2, intrvlb3, intrvlb1, intrvlb4]) intrvlsmerge = intrvlsa.merge(intrvlsb, predicate=overlaps()) self.assertEqual(len(intrvlsmerge.intrvls), 4) self.assertEqual(intrvlsmerge.intrvls[0].__repr__(), "<Interval start:1.0 end:26.0 payload:1>") self.assertEqual(intrvlsmerge.intrvls[1].__repr__(), "<Interval start:50.0 end:55.0 payload:1>") self.assertEqual(intrvlsmerge.intrvls[2].__repr__(), "<Interval start:100.0 end:110.0 payload:1>") self.assertEqual(intrvlsmerge.intrvls[3].__repr__(), "<Interval start:190.0 end:220.0 payload:1>") intrvlsmerge = intrvlsb.merge(intrvlsa, predicate=overlaps()) self.assertEqual(len(intrvlsmerge.intrvls), 4) self.assertEqual(intrvlsmerge.intrvls[0].__repr__(), "<Interval start:1.0 end:26.0 payload:2>") self.assertEqual(intrvlsmerge.intrvls[1].__repr__(), "<Interval start:50.0 end:55.0 payload:2>") self.assertEqual(intrvlsmerge.intrvls[2].__repr__(), "<Interval start:100.0 end:110.0 payload:2>") self.assertEqual(intrvlsmerge.intrvls[3].__repr__(), "<Interval start:190.0 end:220.0 payload:2>") intrvlsmerge = intrvlsa.merge(intrvlsb, predicate=overlaps_before()) self.assertEqual(len(intrvlsmerge.intrvls), 1) self.assertEqual(intrvlsmerge.intrvls[0].__repr__(), "<Interval start:1.0 end:26.0 payload:1>") intrvlsmerge = intrvlsa.merge(intrvlsb, predicate=overlaps_before(), payload_merge_op=payload_second) self.assertEqual(len(intrvlsmerge.intrvls), 1) self.assertEqual(intrvlsmerge.intrvls[0].__repr__(), "<Interval start:1.0 end:26.0 payload:2>") intrvla1 = Interval(1., 25., 1) intrvla2 = Interval(52., 55., 1) intrvla3 = Interval(100., 110., 1) intrvla4 = Interval(200., 210., 1) intrvlb1 = Interval(25., 31., 2) intrvlb2 = Interval(56., 90., 2) intrvlb3 = Interval(101., 105., 2) intrvlb4 = Interval(190., 220., 2) intrvlsa = IntervalList([intrvla2, intrvla3, intrvla1, intrvla4]) intrvlsb = IntervalList([intrvlb2, intrvlb3, intrvlb1, intrvlb4]) intrvlsmerge = intrvlsa.merge(intrvlsb, predicate=meets_before()) self.assertEqual(len(intrvlsmerge.intrvls), 1) self.assertEqual(intrvlsmerge.intrvls[0].__repr__(), "<Interval start:1.0 end:31.0 payload:1>") intrvlsmerge = intrvlsa.merge(intrvlsb, predicate=before(0.1, 10.0)) self.assertEqual(len(intrvlsmerge.intrvls), 1) self.assertEqual(intrvlsmerge.intrvls[0].__repr__(), "<Interval start:52.0 end:90.0 payload:1>")
def test_minus(self): intrvl_long1 = Interval(1., 10., 1) intrvl_long2 = Interval(3., 15., 2) intrvlshort1 = Interval(2., 2.5, 3) intrvlshort2 = Interval(2., 2.7, 4) intrvlshort3 = Interval(2.9, 3.5, 5) intrvlshort4 = Interval(5., 7., 6) intrvlshort5 = Interval(9., 12., 7) intrvlshort6 = Interval(14., 16., 8) intrvlslong = IntervalList([intrvl_long2, intrvl_long1]) intrvlsshort = IntervalList([ intrvlshort2, intrvlshort5, intrvlshort3, intrvlshort1, intrvlshort4, intrvlshort6 ]) intrvlsminusrec = intrvlslong.minus(intrvlsshort) self.assertEqual(len(intrvlsminusrec.intrvls), 7) self.assertEqual(intrvlsminusrec.intrvls[0].__repr__(), "<Interval start:1.0 end:2.0 payload:1>") self.assertEqual(intrvlsminusrec.intrvls[1].__repr__(), "<Interval start:2.7 end:2.9 payload:1>") self.assertEqual(intrvlsminusrec.intrvls[2].__repr__(), "<Interval start:3.5 end:5.0 payload:1>") self.assertEqual(intrvlsminusrec.intrvls[3].__repr__(), "<Interval start:3.5 end:5.0 payload:2>") self.assertEqual(intrvlsminusrec.intrvls[4].__repr__(), "<Interval start:7.0 end:9.0 payload:1>") self.assertEqual(intrvlsminusrec.intrvls[5].__repr__(), "<Interval start:7.0 end:9.0 payload:2>") self.assertEqual(intrvlsminusrec.intrvls[6].__repr__(), "<Interval start:12.0 end:14.0 payload:2>") intrvlsminusnonrec = intrvlslong.minus(intrvlsshort, recursive_diff=False) self.assertEqual(len(intrvlsminusnonrec.intrvls), 15) intrvlsminusrec = intrvlslong.minus(intrvlsshort, payload_merge_op=payload_second) self.assertEqual(len(intrvlsminusrec.intrvls), 7) self.assertEqual(intrvlsminusrec.intrvls[0].__repr__(), "<Interval start:1.0 end:2.0 payload:3>") self.assertEqual(intrvlsminusrec.intrvls[1].__repr__(), "<Interval start:2.7 end:2.9 payload:4>") self.assertEqual(intrvlsminusrec.intrvls[2].__repr__(), "<Interval start:3.5 end:5.0 payload:5>") self.assertEqual(intrvlsminusrec.intrvls[3].__repr__(), "<Interval start:3.5 end:5.0 payload:5>") self.assertEqual(intrvlsminusrec.intrvls[4].__repr__(), "<Interval start:7.0 end:9.0 payload:6>") self.assertEqual(intrvlsminusrec.intrvls[5].__repr__(), "<Interval start:7.0 end:9.0 payload:6>") self.assertEqual(intrvlsminusrec.intrvls[6].__repr__(), "<Interval start:12.0 end:14.0 payload:7>") intrvlsminusrec = intrvlslong.minus(intrvlsshort, predicate=overlaps_before()) self.assertEqual(len(intrvlsminusrec.intrvls), 2) self.assertEqual(intrvlsminusrec.intrvls[0].__repr__(), "<Interval start:1.0 end:9.0 payload:1>") self.assertEqual(intrvlsminusrec.intrvls[1].__repr__(), "<Interval start:3.0 end:14.0 payload:2>")
def get_text_intervals(word, transcript): return IntervalList([ (start_sec - TRANSCRIPT_DELAY, end_sec - TRANSCRIPT_DELAY, 0) for text, start_sec, end_sec in transcript if word in text ]).coalesce()