def load_frames(): return par_for(frames_for_video, videos, workers=8)
videos = list(Video.objects.all().order_by('id')) cfg = cluster_config(num_workers=100, worker=worker_config('n1-standard-16', gpu=1), pipelines=[clothing_detection.ClothingDetectionPipeline]) with make_cluster(cfg, sql_pool=2, no_delete=True) as db_wrapper: # if True: # db_wrapper = ScannerWrapper.create() db = db_wrapper.db print('Fetching frames') frames = pcache.get('clothing_frames', lambda: par_for(frames_for_video, videos, workers=8)) videos, frames = unzip([(v, f) for (v, f) in zip(videos, frames) if len(f) > 0]) videos = list(videos) frames = list(frames) videos = videos frames = frames bbox_tables = [ ScannerSQLTable( Face, v, num_elements=len(f), filter= 'MOD(query_frame.number, CAST(FLOOR(query_video.fps * 3) AS INTEGER)) = 0'
videos = list(Video.objects.filter(threeyears_dataset=True).order_by('id')) def load_frames(): return par_for(frames_for_video, videos, workers=8) frames = pcache.get('emb_frames', load_frames, force=True) videos, frames = unzip([(v, f) for (v, f) in zip(videos, frames) if len(f) > 0]) videos = list(videos) frames = list(frames) # Export packed embeddings and IDs into single files if False: def get_ids(video): return [f['id'] for f in Face.objects.filter(frame__video=video).order_by('frame__number', 'id').values('id')] all_ids = pcache.get('emb_ids', (lambda: par_for(get_ids, videos, workers=4))) import struct with open('/app/data/embs/sevenyears_ids.bin', 'wb') as f: for i, ids in tqdm(enumerate(all_ids)): path = '/app/data/embs/{:07d}.bin'.format(i) if os.path.isfile(path): f.write(b''.join([struct.pack('=Q', i) for i in ids])) with open('/app/data/embs/sevenyears_embs.bin', 'wb') as f: for i in tqdm(list(range(len(videos)))): path = '/app/data/embs/{:07d}.bin'.format(i) if os.path.isfile(path): byts = open(path, 'rb').read() if len(byts) / (4 * 128) != len(all_ids[i]): print(i)
exit() videos = videos cfg = cluster_config(num_workers=100, worker=worker_config('n1-standard-64'), pipelines=[gender_detection.GenderDetectionPipeline]) with make_cluster(cfg, sql_pool=2, no_delete=True) as db_wrapper: db = db_wrapper.db # if True: # db_wrapper = ScannerWrapper.create() frames = pcache.get('gender_frames', lambda: par_for(frames_for_video, videos, workers=8)) videos, frames = unzip([(v, f) for (v, f) in zip(videos, frames) if len(f) > 0]) videos = list(videos) frames = list(frames) detect_genders(db, videos=[v.for_scannertools() for v in videos], db_videos=videos, frames=frames, faces=[ ScannerSQLTable( Face, v, num_elements=len(f), filter='query_frame.shot_boundary = false') for v, f in zip(videos, frames)
hists = shot_detection.compute_histograms( db, videos=[v.for_scannertools() for v in videos], run_opts={ 'io_packet_size': 10000, 'work_packet_size': 1000 }) bfs = compute_black_frames( db, videos=[v.for_scannertools() for v in videos], histograms=hists, run_opts={ 'io_packet_size': 100000, 'work_packet_size': 10000 }) def load_bf(i): path = '/app/data/blackframes/{:07d}.bin'.format(i) if os.path.isfile(path): return try: with open(path, 'wb') as f: f.write(np.array(list(bfs[i].load()), dtype=np.uint8).tobytes()) except Exception: print(i) print('Loading...') par_for(load_bf, list(range(len(bfs))), workers=8)
if __name__ == "__main__": print("Prepare videos and frames") db = scannerpy.Database() videos = Video.objects.filter( path__contains='Tabletennis').order_by('id')[0:1] # frames = [[i for i in range(v.num_frames)] for v in videos] video_ids = [video.id for video in videos] hists = shot_detection.compute_histograms( db, videos=[v.for_scannertools() for v in videos], cache=True, run_opts={ 'io_packet_size': 100, 'work_packet_size': 10, 'pipeline_instances_per_node': 5, 'checkpoint_frequency': 1, }) def load_hist(i): path = '/app/data/histogram/{:07d}.bin'.format(video_ids[i]) hist = np.array(list(hists[i].load()), dtype=np.int) print(hist.shape) print(hist) with open(path, 'wb') as f: f.write(hist.tobytes()) print('Loading...') par_for(load_hist, list(range(len(hists))), workers=8)
ScannerJobConfig(io_packet_size=500, work_packet_size=20, pipelines_per_worker=4), ScannerJobConfig(io_packet_size=1000, work_packet_size=20, pipelines_per_worker=4), ScannerJobConfig(io_packet_size=1000, work_packet_size=80, pipelines_per_worker=4), ScannerJobConfig(io_packet_size=1000, work_packet_size=20, pipelines_per_worker=8), ])] bench('embedding', {'videos': videos, 'frames': [frames_for_video(v) for v in videos]}, run_pipeline, configs, no_delete=True, force=True) exit() # Export packed embeddings and IDs into single files if False: def get_ids(video): return [f['id'] for f in Face.objects.filter(frame__video=video).order_by('frame__number', 'id').values('id')] all_ids = par_for(get_ids, videos, workers=4) import struct with open('/app/data/embs/sevenyears_ids.bin', 'wb') as f: for ids in tqdm(all_ids): f.write(b''.join([struct.pack('=Q', i) for i in ids])) with open('/app/data/embs/sevenyears_embs.bin', 'wb') as f: for i in tqdm(list(range(len(videos)))): f.write(open('/app/data/embs/{:07d}.bin'.format(i), 'rb').read()) f.flush() if __name__ == "__main__": videos = list(Video.objects.filter(threeyears_dataset=False).order_by('id')) videos = videos