def main(): # set test video list # video_list = ['CNNW_20160107_180000_Wolf'] videos = Video.objects.filter(threeyears_dataset=True).all() addtional_field = pickle.load(open('/app/data/addtional_field.pkl', 'rb')) videos = [video for video in videos if addtional_field[video.id]['valid_transcript']] videos = videos[20000:30000] # todo: remove videos whose result is already dumped # get audio length # pkl_path = '/app/data/audio_length_dict.pkl' # audio_length_dict = pickle.load(open(pkl_path, 'rb')) # audio_length = [audio_length_dict[video_name] for video_name in video_list] # load audios from videos audios = [audio.AudioSource(video.for_scannertools(), frame_size=SEG_LENGTH, duration=addtional_field[video.id]['audio_duration']) for video in videos] # set up transcripts captions = [audio.CaptionSource('tvnews/subs10/'+video.item_name(), max_time=addtional_field[video.id]['audio_duration'], window_size=SEG_LENGTH) for video in videos] # set up run opts run_opts = {'pipeline_instances_per_node': 32, 'checkpoint_frequency': 5} # set up align opts align_opts = {'seg_length' : 60, 'max_misalign' : 10, 'num_thread' : 1, 'exhausted' : False, 'align_dir' : None, 'res_path' : None, # 'align_dir' : '/app/data/subs/orig/', # 'res_path' : '/app/result/test_align_3y.pkl', } '''local run''' # db = scannerpy.Database() # transcript_alignment.align_transcript(db, videos, audios, captions, run_opts, align_opts, cache=False) '''kubernete run''' cfg = cluster_config( num_workers=100, worker=worker_config('n1-standard-32')) with make_cluster(cfg, no_delete=True) as db_wrapper: db = db_wrapper.db transcript_alignment.align_transcript_pipeline(db=db, audio=audios, captions=captions, cache=False, run_opts=run_opts, align_opts=align_opts)
LABELED_TAG, _ = Tag.objects.get_or_create(name='opticalflowhists:labeled') bad_movie_ids = set([]) #labeled_videos = set([videotag.video_id # for videotag in VideoTag.objects.filter(tag=LABELED_TAG).all()]) labeled_videos = set() all_videos = set( [video.id for video in Video.objects.filter(ignore_film=False).all()]) video_ids = sorted( list(all_videos.difference(labeled_videos).difference(bad_movie_ids))) videos = Video.objects.filter(id__in=video_ids).order_by('id') cfg = cluster_config(num_workers=100, worker=worker_config('n1-standard-32')) with make_cluster(cfg, no_delete=True) as db_wrapper: db = db_wrapper.db #if True: # db_wrapper = ScannerWrapper.create() # db = db_wrapper.db histograms = st.histograms.compute_flow_histograms( db, videos=[video.for_scannertools() for video in list(videos)], run_opts={ 'work_packet_size': 4, 'pipeline_instances_per_node': 2, 'io_packet_size': 2496, 'checkpoint_frequency': 1, 'tasks_in_queue_per_pu': 2 })
def bench(name, args, run_pipeline, configs, force=False, no_delete=False): sample_size = len(args['videos']) def run_name(cluster_config, job_config): worker_type = cluster_config.worker.type return '{name}-{cpu}cpu-{mem}mem-{batch}batch-{wpkt}wpkt-{iopkt}iopkt-{ldwk}ldwk-{svwk}svwk-{vid}vid'.format( name=name, cpu=worker_type.get_cpu(), mem=worker_type.get_mem(), batch=job_config.batch, wpkt=job_config.work_packet_size, iopkt=job_config.io_packet_size, ldwk=cluster_config.num_load_workers, svwk=cluster_config.num_save_workers, vid=sample_size) def run_config(args, db_wrapper, job_config): db = db_wrapper.db # Start the Scanner job log.info('Starting Scanner job') run_opts = { 'io_packet_size': job_config.io_packet_size, 'work_packet_size': job_config.work_packet_size, } ppw = job_config.pipelines_per_worker if ppw != -1: run_opts['pipeline_instances_per_node'] = ppw run_pipeline(db, detach=True, run_opts=run_opts, **args) # Wait until it succeeds or crashes start = now() log.info('Monitoring cluster') result, metrics = db_wrapper.cluster.monitor(db) end = now() - start # If we crashed: if not result: # Restart the cluster if it's in a bad state db_wrapper.cluster.start() raise TestFailure("Out of memory") # Write out profile if run succeeded outputs = run_pipeline(db, no_execute=True, **args) try: outputs[0]._column._table.profiler().write_trace( '/app/data/traces/{}.trace'.format( run_name(db_wrapper.cluster.config(), job_config))) except Exception: log.error('Failed to write trace') traceback.print_exc() return end, pd.DataFrame(metrics) def test_config(args, db_wrapper, cluster_config, job_config): time, metrics = run_config(args, db_wrapper, job_config) if time is not None: price_per_hour = cluster_config.price(no_master=True) price_per_video = (time / 3600.0) * price_per_hour / float(sample_size) return price_per_video, metrics else: return None results = [] for (cluster_config, job_configs) in configs: # Only bring up the cluster if there exists a job config that hasn't been computed if not force and all([ pcache.has(run_name(cluster_config, job_config)) for job_config in job_configs ]): results.append([ pcache.get(run_name(cluster_config, job_config)) for job_config in job_configs ]) else: with make_cluster(cluster_config, no_delete=no_delete) as db_wrapper: log.info('Cluster config: {}'.format(cluster_config)) def try_config(job_config): log.info('Job config: {}'.format(job_config)) try: return test_config(args, db_wrapper, cluster_config, job_config) except TestFailure as e: print(e) return (str(e), None) except Exception as e: traceback.print_exc() return (traceback.format_exc(), None) def try_config_cached(job_config): return pcache.get(run_name(cluster_config, job_config), force=force, fn=lambda: try_config(job_config)) results.append(list(map(try_config_cached, job_configs))) # Don't do this at top-level in case this file is incidentally imported into Jupyter import matplotlib matplotlib.use('agg') import matplotlib.pyplot as plt def plot(metrics, name): ax = metrics.plot('TIME', name) ax.set_title(name) ax.set_ylabel('Percent') ax.set_xlabel('Sample') fig = ax.get_figure() fig.tight_layout() fig.savefig('/tmp/graph.svg') fig.clf() return open('/tmp/graph.svg', 'r').read() report_template = ''' <!DOCTYPE html> <html> <head> <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0-beta.2/css/bootstrap.min.css" integrity="sha384-PsH8R72JQ3SOdhVi3uxftmaW6Vc51MKb0q5P2rRUpPvrszuE4W1povHYgTpBfshb" crossorigin="anonymous"> <style> svg {{ width: 50%; margin: 0; float: left; }} p {{ margin-bottom: 0; }} </style> </head> <body> <div class="container"> <h1>Scanner benchmark report</h1> {report} </div> </body> </html> ''' blocks = '' for ((cluster_config, job_configs), cluster_results) in zip(configs, results): for (job_config, (job_result, metrics)) in zip(job_configs, cluster_results): if metrics is None: blocks += '<div><h3>{name}</h3><p>{result}</p></div>'.format( name=run_name(cluster_config, job_config), result=job_result) continue cpu = plot(metrics, 'CPU%') mem = plot(metrics, 'MEMORY%') block = ''' <div> <h3>{name}</h3> <p>${result:.05f}/video</p> <div> {cpu} {mem} </div> </div> '''.format(name=run_name(cluster_config, job_config), result=job_result, cpu=cpu, mem=mem) blocks += block report = report_template.format(report=blocks) with open( '/app/data/benchmarks/{}-{}.html'.format( name, strftime('%Y-%m-%d-%H-%M')), 'w') as f: f.write(report) # Collect all traces into a tarfile sp.check_call('cd /app/data && tar -czf bench.tar.gz traces benchmarks', shell=True) # Let desktop know bench is complete, and should download benchmark files notifier.notify('Benchmark complete', action='bench')
'bboxes': self._db.ops.PrepareClothingBbox( frame=self._sources['frame_sampled'].op, bboxes=bboxes) } detect_clothing_bboxes = ClothingBboxesPipeline.make_runner() detect_clothing = ClothingDetectionPipeline.make_runner() videos = list(Video.objects.all().order_by('id')) cfg = cluster_config(num_workers=100, worker=worker_config('n1-standard-16', gpu=1), pipelines=[clothing_detection.ClothingDetectionPipeline]) with make_cluster(cfg, sql_pool=2, no_delete=True) as db_wrapper: # if True: # db_wrapper = ScannerWrapper.create() db = db_wrapper.db print('Fetching frames') frames = pcache.get('clothing_frames', lambda: par_for(frames_for_video, videos, workers=8)) videos, frames = unzip([(v, f) for (v, f) in zip(videos, frames) if len(f) > 0]) videos = list(videos) frames = list(frames) videos = videos frames = frames