def main(): # set test video list # video_list = ['CNNW_20160107_180000_Wolf'] videos = Video.objects.filter(threeyears_dataset=True).all() addtional_field = pickle.load(open('/app/data/addtional_field.pkl', 'rb')) videos = [video for video in videos if addtional_field[video.id]['valid_transcript']] videos = videos[20000:30000] # todo: remove videos whose result is already dumped # get audio length # pkl_path = '/app/data/audio_length_dict.pkl' # audio_length_dict = pickle.load(open(pkl_path, 'rb')) # audio_length = [audio_length_dict[video_name] for video_name in video_list] # load audios from videos audios = [audio.AudioSource(video.for_scannertools(), frame_size=SEG_LENGTH, duration=addtional_field[video.id]['audio_duration']) for video in videos] # set up transcripts captions = [audio.CaptionSource('tvnews/subs10/'+video.item_name(), max_time=addtional_field[video.id]['audio_duration'], window_size=SEG_LENGTH) for video in videos] # set up run opts run_opts = {'pipeline_instances_per_node': 32, 'checkpoint_frequency': 5} # set up align opts align_opts = {'seg_length' : 60, 'max_misalign' : 10, 'num_thread' : 1, 'exhausted' : False, 'align_dir' : None, 'res_path' : None, # 'align_dir' : '/app/data/subs/orig/', # 'res_path' : '/app/result/test_align_3y.pkl', } '''local run''' # db = scannerpy.Database() # transcript_alignment.align_transcript(db, videos, audios, captions, run_opts, align_opts, cache=False) '''kubernete run''' cfg = cluster_config( num_workers=100, worker=worker_config('n1-standard-32')) with make_cluster(cfg, no_delete=True) as db_wrapper: db = db_wrapper.db transcript_alignment.align_transcript_pipeline(db=db, audio=audios, captions=captions, cache=False, run_opts=run_opts, align_opts=align_opts)
LABELER, _ = Labeler.objects.get_or_create(name='opticalflowhists') LABELED_TAG, _ = Tag.objects.get_or_create(name='opticalflowhists:labeled') bad_movie_ids = set([]) #labeled_videos = set([videotag.video_id # for videotag in VideoTag.objects.filter(tag=LABELED_TAG).all()]) labeled_videos = set() all_videos = set( [video.id for video in Video.objects.filter(ignore_film=False).all()]) video_ids = sorted( list(all_videos.difference(labeled_videos).difference(bad_movie_ids))) videos = Video.objects.filter(id__in=video_ids).order_by('id') cfg = cluster_config(num_workers=100, worker=worker_config('n1-standard-32')) with make_cluster(cfg, no_delete=True) as db_wrapper: db = db_wrapper.db #if True: # db_wrapper = ScannerWrapper.create() # db = db_wrapper.db histograms = st.histograms.compute_flow_histograms( db, videos=[video.for_scannertools() for video in list(videos)], run_opts={ 'work_packet_size': 4, 'pipeline_instances_per_node': 2, 'io_packet_size': 2496, 'checkpoint_frequency': 1, 'tasks_in_queue_per_pu': 2
# with Timer('Histogram'): # cfg = cluster_config( # num_workers=300, # worker=worker_config('n1-standard-16')) # with make_cluster(cfg, no_delete=True) as db_wrapper: # videos = videos #videos = list(Video.objects.filter(id__gte=91250, id__lte=91350)) # videos = [Video.objects.get(id=63970)] videos = videos with Timer('Shot boundaries'): cfg = cluster_config(num_workers=60, worker=worker_config('n1-highmem-16'), workers_per_node=2, num_load_workers=1, num_save_workers=2) with make_cluster(cfg, no_delete=True) as db_wrapper: # from esper.scannerutil import ScannerWrapper # if True: # db_wrapper = ScannerWrapper.create() db = db_wrapper.db job_config = ScannerJobConfig(io_packet_size=10000, work_packet_size=400, batch=400) hists = run_pipeline(db, videos,
videos = Video.objects.filter(id__in=video_ids).order_by('id').all() print("Getting frames to compute on") # Get frames that we computed faces on frames = [ [ f.number for f in Frame.objects.filter(video_id=video, tags__name='face_computed').order_by('number') ] for video in tqdm(video_ids) ] # Cluster parameters cfg = cluster_config(num_workers=80, worker=worker_config('n1-standard-32'), pipelines=[st.gender_detection.GenderDetectionPipeline]) #with make_cluster(cfg, no_delete=True) as db_wrapper: # db = db_wrapper.db if True: db = scannerpy.Database() print("Loading faces from Scanner") # Load faces faces = st.face_detection.detect_faces( db, videos=[video.for_scannertools() for video in videos], frames=frames ) print("Detecting genders")
def build_pipeline(self): bboxes = self._db.ops.BboxesFromJson(bboxes=self._sources['bboxes'].op) return { 'bboxes': self._db.ops.PrepareClothingBbox( frame=self._sources['frame_sampled'].op, bboxes=bboxes) } detect_clothing_bboxes = ClothingBboxesPipeline.make_runner() detect_clothing = ClothingDetectionPipeline.make_runner() videos = list(Video.objects.all().order_by('id')) cfg = cluster_config(num_workers=100, worker=worker_config('n1-standard-16', gpu=1), pipelines=[clothing_detection.ClothingDetectionPipeline]) with make_cluster(cfg, sql_pool=2, no_delete=True) as db_wrapper: # if True: # db_wrapper = ScannerWrapper.create() db = db_wrapper.db print('Fetching frames') frames = pcache.get('clothing_frames', lambda: par_for(frames_for_video, videos, workers=8)) videos, frames = unzip([(v, f) for (v, f) in zip(videos, frames) if len(f) > 0]) videos = list(videos) frames = list(frames)
.values('number').order_by('number')] if False: with Timer('benchmark'): videos = videos[:30] def run_pipeline(db, videos, frames, **kwargs): return embed_faces( db, videos=[v.for_scannertools() for v in videos], frames=frames, faces=[ScannerSQLTable(Face, v) #num_elements=len(f)) for v, f in zip(videos, frames)], cache=False, **kwargs) cfg = cluster_config( num_workers=5, worker=worker_config('n1-standard-32'), pipelines=[face_embedding.FaceEmbeddingPipeline]) configs = [(cfg, [ ScannerJobConfig(io_packet_size=500, work_packet_size=20, pipelines_per_worker=4), ScannerJobConfig(io_packet_size=1000, work_packet_size=20, pipelines_per_worker=4), ScannerJobConfig(io_packet_size=1000, work_packet_size=80, pipelines_per_worker=4), ScannerJobConfig(io_packet_size=1000, work_packet_size=20, pipelines_per_worker=8), ])] bench('embedding', {'videos': videos, 'frames': [frames_for_video(v) for v in videos]}, run_pipeline, configs, no_delete=True, force=True) exit() videos = list(Video.objects.filter(threeyears_dataset=True).order_by('id')) def load_frames(): return par_for(frames_for_video, videos, workers=8) frames = pcache.get('emb_frames', load_frames, force=True)
def build_pipeline(self): output_ops = super(FaceDetectionPipeline, self).build_pipeline() output_ops['frame_ids'] = self._sources['frame_ids'].op return output_ops detect_faces = FaceDetectionPipeline.make_runner() videos = list(Video.objects.filter(threeyears_dataset=True).order_by('id')) if False: with Timer('benchmark'): videos = videos[:50] def run_pipeline(db, videos, frames, **kwargs): return face_detection.detect_faces(db, videos=[v.for_scannertools() for v in videos], frames=frames, cache=False, **kwargs) cfg = cluster_config(num_workers=5, worker=worker_config('n1-standard-32')) configs = [(cfg, [ScannerJobConfig(io_packet_size=1000, work_packet_size=20, batch=1)])] bench('face', {'videos': videos, 'frames': [[f['number'] for f in Frame.objects.filter(video=v).values('number').order_by('number')] for v in videos]}, run_pipeline, configs, no_delete=True, force=True) with Timer('run'): print('Getting frames') def load_frames(): return [[f['number'] for f in Frame.objects.filter(video=v, shot_boundary=False).values('number').order_by('number')] for v in tqdm(videos)] frames = pcache.get('face_frames', load_frames) cfg = cluster_config( num_workers=100,
def run_pipeline(db, videos, frames, **kwargs): return detect_genders( db, db_videos=videos, videos=[v.for_scannertools() for v in videos], frames=frames, faces=[ ScannerSQLTable(Face, v) #num_elements=len(f)) for v, f in zip(videos, frames) ], cache=False, **kwargs) cfg = cluster_config(num_workers=5, worker=worker_config('n1-standard-32'), pipelines=[GenderDetectionPipeline]) configs = [(cfg, [ ScannerJobConfig(io_packet_size=1000, work_packet_size=20, pipelines_per_worker=4), ScannerJobConfig(io_packet_size=1000, work_packet_size=20, pipelines_per_worker=8), ScannerJobConfig(io_packet_size=1000, work_packet_size=20, pipelines_per_worker=16) ])] bench('gender', { 'videos': videos, 'frames': [frames_for_video(v) for v in videos]
model_key='best_model', adjust_bboxes=adjust_bboxes, device=self._device), # 'bboxes': new_bbs } detect_clothing = ClothingDetectionPipeline.make_runner() from esper.fuckyou import cache_table if __name__ == '__main__': videos = list(Video.objects.all().order_by('id')) cfg = cluster_config( num_workers=50, worker=worker_config('n1-standard-16', gpu=2), pipelines=[hairstyle_detection.HairStyleDetectionPipeline]) #pipelines=[clothing_detection.ClothingDetectionPipeline]) # with make_cluster(cfg, sql_pool=2, no_delete=True) as db_wrapper: if True: db_wrapper = ScannerWrapper.create() db = db_wrapper.db print('Fetching frames') frames = pcache.get( 'clothing_frames', lambda: par_for(frames_for_video, videos, workers=8)) videos, frames = unzip([(v, f) for (v, f) in zip(videos, frames) if len(f) > 0])
#])) print(video_ids, len(labeled_videos), len(video_ids)) videos = Video.objects.filter(id__in=video_ids).order_by('id').all() print("Getting frames to compute on") # Get frames that we computed faces on frames = [[ f.number for f in Frame.objects.filter( video_id=video, tags__name='face_computed').order_by('number') ] for video in tqdm(video_ids)] # Cluster parameters cfg = cluster_config(num_workers=200, worker=worker_config('n1-standard-32'), pipelines=[st.object_detection.ObjectDetectionPipeline]) #with make_cluster(cfg, no_delete=True) as db_wrapper: # db = db_wrapper.db if True: db = scannerpy.Database() print("Detecting objects") objects = st.object_detection.detect_objects( db, videos=[video.for_scannertools() for video in videos], frames=frames) print("Downloading label definitions") LABEL_URL = 'https://storage.googleapis.com/scanner-data/public/mscoco_label_map.pbtxt' label_path = download_temp_file(LABEL_URL)
video_ids = sorted(list(all_videos.difference(ids_to_exclude))) print(video_ids, len(labeled_videos), len(video_ids)) videos = Video.objects.filter(id__in=video_ids).order_by('id').all() print("Getting frames to compute on") # Get frames that we computed faces on frames = [[ f.number for f in Frame.objects.filter( video_id=video, tags__name='face_computed').order_by('number') ] for video in tqdm(video_ids)] # Cluster parameters cfg = cluster_config( num_workers=200, worker=worker_config('n1-standard-32'), pipelines=[st.face_landmark_detection.FaceLandmarkDetectionPipeline]) with make_cluster(cfg, no_delete=True) as db_wrapper: db = db_wrapper.db #if True: # db = scannerpy.Database() #print("Loading faces from Scanner") ## Load faces #faces = st.face_detection.detect_faces( # db, # videos=[video.for_scannertools() for video in videos], # frames=frames #) #print("Detecting face landmarks")
def build_pipeline(self): output_ops = super(FaceDetectionPipeline, self).build_pipeline() output_ops['frame_ids'] = self._sources['frame_ids'].op return output_ops detect_faces = FaceDetectionPipeline.make_runner() videos = list(Video.objects.filter(threeyears_dataset=False).order_by('id')) if False: with Timer('benchmark'): videos = videos[:50] def run_pipeline(db, videos, frames, **kwargs): return face_detection.detect_faces(db, videos=[v.for_scannertools() for v in videos], frames=frames, cache=False, **kwargs) cfg = cluster_config(num_workers=5, worker=worker_config('n1-standard-32')) configs = [(cfg, [ScannerJobConfig(io_packet_size=1000, work_packet_size=20, batch=1)])] bench('face', {'videos': videos, 'frames': [[f['number'] for f in Frame.objects.filter(video=v).values('number').order_by('number')] for v in videos]}, run_pipeline, configs, no_delete=True, force=True) videos = videos with Timer('run'): cfg = cluster_config( num_workers=80, worker=worker_config('n1-standard-32'), workers_per_node=8, num_load_workers=1, num_save_workers=1) with make_cluster(cfg, sql_pool=4, no_delete=True) as db_wrapper: