def find_text_in_video(frame_iterator, find_text_in_frame_func, stability_threshold=5): base_frame = [] pending_blobs = [] past_blobs = [] frame_queue = deque(maxlen=stability_threshold) # general buffer rewinding_queue = deque() frame_iterator = iter(frame_iterator) def next_frame(): while True: if len(rewinding_queue) > 0: yield rewinding_queue.popleft() else: yield frame_iterator.next() def rewind(nframe): assert(nframe <= len(frame_queue)) for _ in xrange(len(frame_queue)-nframe): frame_queue.popleft() rewinding_queue.extend(frame_queue) for sec, frame in next_frame(): frame_queue.append((sec,frame)) yield 'new_frame', (sec,frame) if len(base_frame) == 0: base_frame = [frame] continue for blob in past_blobs[:]: if len(blob.get('removed_changed_frac',[])) >= stability_threshold: # if np.median(blob['removed_changed_frac']) < 0.4: # erasure seems stable fracs = np.array(blob['removed_changed_frac']) # second largest unchanged fraction is < 0.4, so lots seem to have been changed... if fracs[fracs.argsort()[-2:][0]] < 0.4: print blob['sec'], np.median(blob['removed_changed_frac']) try: past_blobs.remove(blob) yield 'erased_blob', blob except Exception, e: print e print int(blob['sec']),blob['left_corner'] # todo rewind? # print 'frame reset at', blob['removed_at_sec'] base_frame = [blob['removed_at_frame']] # reset base frame else: # probably not actual erasure del blob['removed_at_sec'], blob['removed_at_frame'], blob['removed_changed_frac'] for blob in past_blobs: b = blob.get('blob_bw', img_proc_utils.otsu_thresholded(blob['blob'])) x, y = blob['left_corner'] current_blob = (img_proc_utils.otsu_thresholded(frame[x:x+b.shape[0],y:y+b.shape[1]]).astype(np.bool)) frac = img_proc_utils.unchanged_fraction(b, current_blob, white_overweight=1/2.) if 'removed_at_sec' in blob: # pending erasure blob['removed_changed_frac'].append(frac) elif frac < 0.4: blob['removed_at_sec'] = int(sec) blob['removed_at_frame'] = frame blob['removed_changed_frac'] = [frac] # which pending blob is stable and thus a real text change? for blob in (b for b in pending_blobs if len(b['unchange_frac']) == stability_threshold): if np.median(blob['unchange_frac'])>0.7: # seems stable between frames other_blobs = [b for b in pending_blobs if len(b['unchange_frac']) < stability_threshold and img_proc_utils.shared_fraction(blob, b) > 0.4] largest_blob = max([blob]+other_blobs, key=lambda x: np.count_nonzero(img_proc_utils.threshold_otsu(x['blob'])>0)) base_frame = [largest_blob['frame']] # reset base frame for b in other_blobs: pending_blobs.remove(b); past_blobs.append(largest_blob) largest_blob['n_sameblobs'] = [b['proba'] for b in [blob]+other_blobs] yield 'new_blob', largest_blob pending_blobs.remove(blob) # compute the change frac with subsequent frames for blob in (b for b in pending_blobs if len(b['unchange_frac']) < stability_threshold): b = blob.get('blob_bw', img_proc_utils.otsu_thresholded(blob['blob'])) x, y = blob['left_corner'] frac = img_proc_utils.unchanged_fraction(b, img_proc_utils.otsu_thresholded(frame[x:x+b.shape[0],y:y+b.shape[1]]), white_overweight=2) blob['unchange_frac'].append(frac) # calculate new blobs text_blobs = find_text_in_frame_func(frame, base_frame) for blob in text_blobs: #print blob blob.update({'frame': frame, 'sec': int(sec), 'unchange_frac':[]}) pending_blobs += text_blobs
def stream_frames(stream, pafy_video = None): r = StrictRedis('localhost') try: r.incr('counter') # keep track of how many processes are running demo_diff = 0 video_length = pafy_video.length if pafy_video else (5412-demo_diff if 'rubakov1' in stream else 5000) if pafy_video: yield server_event_msg({'video_length': pafy_video.length, 'video_title': pafy_video.title, 'video_desc': pafy_video.description, 'video_author': pafy_video.author, 'video_url': pafy_video.url}, 'onstart') else: if 'rubakov1' in stream: demo_diff = 4*60 # the demo video is four min in yield server_event_msg({"video_author": "Galileo Galilei", "video_length": 5412-demo_diff, "video_title": "Early Universe - V. Rubakov - lecture 1/9", "video_url": "https://www.youtube.com/watch?v=XsqtPhra2f0", "video_desc": "GGI lectures on the theory of fundamental interactions, January 2015\nhttp://heidi.pd.infn.it/html/GGI/index.php"}, 'onstart') else: yield server_event_msg({'video_length': 5000,'video_title': stream }, 'onstart') hist = defaultdict(float) it = utils.find_text_in_video( utils.get_frames_from_stream(stream,3), lambda frame,base_frames: utils.find_text_in_frame(frame, base_frames, proba_threshold=0.5)) for dtype, data in it: if dtype == 'new_frame': yield server_event_msg({'sec': int(data[0])},'onprogress') elif dtype == 'new_blob': yield server_event_msg({'img': utils.img_to_base64_bytes(data['blob']), #utils.img_to_base64_bytes(255-np.nan_to_num(abs(blob))), 'sec': int(data['sec']+demo_diff), 'proba': round(data['proba'],2), 'left_corner': data['left_corner'], 'size': data['blob'].shape, 'n_sameblobs': data['n_sameblobs'], # 'frame': utils.img_to_base64_bytes(data['frame']) }) if 'blob_bw' not in data: data['blob_bw'] = img_proc_utils.otsu_thresholded(data['blob']) hist[(int(data['sec']+demo_diff)/60)] += np.count_nonzero(data['blob_bw'][data['blob_bw']>0]) # print hist, {'hist': [{'x': k, 'y': v} for k,v in hist.iteritems()]} # yield server_event_msg({'hist': [{'x': k, 'y': int(v/10.)} for k,v in hist.iteritems()]}, 'onhist') yield server_event_msg({'hist': [{'x': i, 'y': hist.get(i,0)} for i in xrange(video_length/60)]}, 'onhist') elif dtype == "erased_blob": yield server_event_msg({'sec': int(data['sec']+demo_diff), 'removed_sec': int(data['removed_at_sec']+demo_diff), 'left_corner': data['left_corner']}, 'onerasure') hist[(int(data['removed_at_sec']+demo_diff)/60)] -= np.count_nonzero(data['blob_bw'][data['blob_bw']>0]) yield server_event_msg({'hist': [{'x': i, 'y': hist.get(i,0)} for i in xrange(video_length/60)]}, 'onhist') yield server_event_msg({'end':True}, 'onend') raise StopIteration finally: r.decr('counter')