def test_bounded_state(db): warmup = 3 frame = db.ops.FrameInput() increment = db.ops.TestIncrementBounded(ignore=frame, warmup=warmup) sampled_increment = increment.sample() output_op = db.ops.Output(columns=[sampled_increment]) job = Job( op_args={ frame: db.table('test1').column('frame'), sampled_increment: db.sampler.gather([0, 10, 25, 26, 27]), output_op: 'test_slicing', } ) bulk_job = BulkJob(output=output_op, jobs=[job]) tables = db.run(bulk_job, force=True, show_progress=False) num_rows = 0 expected_output = [0, warmup, warmup, warmup + 1, warmup + 2] for (frame_index, buf) in tables[0].column('integer').load(): (val,) = struct.unpack('=q', buf) assert val == expected_output[num_rows] print(num_rows) num_rows += 1 assert num_rows == 5
def test_job_blacklist(blacklist_db): db = blacklist_db db.register_op('TestPyFail', [('frame', ColumnType.Video)], ['dummy']) db.register_python_kernel('TestPyFail', DeviceType.CPU, cwd + '/test_py_fail_kernel.py') frame = db.ops.FrameInput() range_frame = frame.sample() failed_output = db.ops.TestPyFail(frame=range_frame) output_op = db.ops.Output(columns=[failed_output]) job = Job( op_args={ frame: db.table('test1').column('frame'), range_frame: db.sampler.range(0, 1), output_op: 'test_py_fail' } ) bulk_job = BulkJob(output=output_op, jobs=[job]) tables = db.run(bulk_job, force=True, show_progress=False, pipeline_instances_per_node=1) table = tables[0] assert table.committed() == False
def job(self, db, ty): frame = db.ops.FrameInput() hist = db.ops.Histogram(frame=frame, device=ty) output_op = db.ops.Output(columns=[hist]) job = Job(op_args={ frame: db.table('test1').column('frame'), output_op: 'test_hist' }) bulk_job = BulkJob(output=output_op, jobs=[job]) return bulk_job
def job(self, db, ty): frame = db.ops.FrameInput() flow = db.ops.OpticalFlow(frame=frame, stencil=[-1, 0], device=ty) flow_range = flow.sample() out = db.ops.Output(columns=[flow_range]) job = Job(op_args={ frame: db.table('test1').column('frame'), flow_range: db.sampler.range(0, 50), out: 'test_flow', }) return BulkJob(output=out, jobs=[job])
def extract(frames): with Database() as db: frame = db.ops.FrameInput() gathered = frame.sample() # TODO(wcrichto): use GPU for resize if exists resized = db.ops.Resize(frame=gathered, width=640, preserve_aspect=True, device=DeviceType.CPU) compressed = db.ops.ImageEncoder(frame=resized) output = db.ops.Output(columns=[compressed]) job = Job(op_args={ frame: db.table(frames[0].video.path).column('frame'), gathered: db.sampler.gather([frame.number for frame in frames]), output: '_ignore' }) start = now() [output] = db.run(BulkJob(output=output, jobs=[job]), force=True) _print('Extract: {:.3f}'.format(now() - start)) start = now() jpgs = [(jpg[0], frame) for (_, jpg), frame in zip(output.load(['img']), frames)] _print('Loaded: {:.3f}'.format(now() - start)) if ESPER_ENV == 'google': temp_dir = tempfile.mkdtemp() def write_jpg((jpg, frame)): with open('{}/frame_{}.jpg'.format(temp_dir, frame.id), 'w') as f: f.write(jpg) start = now() with ThreadPoolExecutor(max_workers=64) as executor: list(executor.map(write_jpg, jpgs)) sp.check_call( shlex.split('gsutil -m mv "{}/*" gs://{}/{}/thumbnails/{}'.format( temp_dir, BUCKET, DATA_PATH, DATASET))) _print('Write: {:.3f}'.format(now() - start)) elif ESPER_ENV == 'local': try: os.makedirs('assets/thumbnails/' + DATASET) except OSError: pass def write_jpg((jpg, frame)): with open('assets/thumbnails/{}/frame_{}.jpg'.format(DATASET, frame.id), 'w') as f: f.write(jpg) start = now() with ThreadPoolExecutor(max_workers=64) as executor: list(executor.map(write_jpg, jpgs)) _print('Write: {:.3f}'.format(now() - start)) return jpg
def run_spacer_job(spacing_args): frame = db.ops.FrameInput() hist = db.ops.Histogram(frame=frame) space_hist = hist.space() output_op = db.ops.Output(columns=[space_hist]) job = Job(op_args={ frame: db.table('test1').column('frame'), space_hist: spacing_args, output_op: 'test_space', }) bulk_job = BulkJob(output=output_op, jobs=[job]) tables = db.run(bulk_job, force=True, show_progress=False) return tables[0]
def test_lossless(db): frame = db.ops.FrameInput() range_frame = frame.sample() blurred_frame = db.ops.Blur(frame=range_frame, kernel_size=3, sigma=0.1) output_op = db.ops.Output(columns=[blurred_frame.lossless()]) job = Job(op_args={ frame: db.table('test1').column('frame'), range_frame: db.sampler.range(0, 30), output_op: 'test_blur_lossless' }) bulk_job = BulkJob(output=output_op, jobs=[job]) tables = db.run(bulk_job, force=True, show_progress=False) table = tables[0] next(table.load(['frame']))
def run_sampler_job(sampler_args, expected_rows): frame = db.ops.FrameInput() sample_frame = frame.sample() output_op = db.ops.Output(columns=[sample_frame]) job = Job(op_args={ frame: db.table('test1').column('frame'), sample_frame: sampler_args, output_op: 'test_sample', }) bulk_job = BulkJob(output=output_op, jobs=[job]) tables = db.run(bulk_job, force=True, show_progress=False) num_rows = 0 for (frame_index, _) in tables[0].column('frame').load(): num_rows += 1 assert num_rows == expected_rows
def test_slicing(db): frame = db.ops.FrameInput() slice_frame = frame.slice() unsliced_frame = slice_frame.unslice() output_op = db.ops.Output(columns=[unsliced_frame]) job = Job(op_args={ frame: db.table('test1').column('frame'), slice_frame: db.partitioner.all(50), output_op: 'test_slicing', }) bulk_job = BulkJob(output=output_op, jobs=[job]) tables = db.run(bulk_job, force=True, show_progress=False) num_rows = 0 for (frame_index, _) in tables[0].column('frame').load(): num_rows += 1 assert num_rows == db.table('test1').num_rows()
def test_profiler(db): frame = db.ops.FrameInput() hist = db.ops.Histogram(frame=frame) output_op = db.ops.Output(columns=[hist]) job = Job(op_args={ frame: db.table('test1').column('frame'), output_op: '_ignore' }) bulk_job = BulkJob(output=output_op, jobs=[job]) output = db.run(bulk_job, show_progress=False, force=True) profiler = output[0].profiler() f = tempfile.NamedTemporaryFile(delete=False) f.close() profiler.write_trace(f.name) profiler.statistics() run(['rm', '-f', f.name])
def test_save_mp4(db): frame = db.ops.FrameInput() range_frame = frame.sample() blurred_frame = db.ops.Blur(frame=range_frame, kernel_size=3, sigma=0.1) output_op = db.ops.Output(columns=[blurred_frame]) job = Job(op_args={ frame: db.table('test1').column('frame'), range_frame: db.sampler.range(0, 30), output_op: 'test_save_mp4' }) bulk_job = BulkJob(output=output_op, jobs=[job]) tables = db.run(bulk_job, force=True, show_progress=False) table = tables[0] f = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') f.close() table.column('frame').save_mp4(f.name) run(['rm', '-rf', f.name])
def test_python_kernel(db): db.register_op('TestPy', [('frame', ColumnType.Video)], ['dummy']) db.register_python_kernel('TestPy', DeviceType.CPU, cwd + '/test_py_kernel.py') frame = db.ops.FrameInput() range_frame = frame.sample() test_out = db.ops.TestPy(frame=range_frame) output_op = db.ops.Output(columns=[test_out]) job = Job(op_args={ frame: db.table('test1').column('frame'), range_frame: db.sampler.range(0, 30), output_op: 'test_hist' }) bulk_job = BulkJob(output=output_op, jobs=[job]) tables = db.run(bulk_job, force=True, show_progress=False) next(tables[0].load(['dummy']))
def test_collection(db): c = db.new_collection('test', [db.table('test1'), db.table('test2')]) frame = db.ops.FrameInput() hist = db.ops.Histogram(frame = frame) output = db.ops.Output(columns=[hist]) jobs = [] for table in c.tables(): job = Job(op_args={ frame: table.column('frame'), output: table.name() + '_ignore', }) jobs.append(job) bulk_job = BulkJob(output=output, jobs=jobs) db.run(bulk_job, show_progress=False, force=True) db.delete_collection('test')
def test_unbounded_state(db): frame = db.ops.FrameInput() slice_frame = frame.slice() increment = db.ops.TestIncrementUnbounded(ignore=slice_frame) unsliced_increment = increment.unslice() output_op = db.ops.Output(columns=[unsliced_increment]) job = Job(op_args={ frame: db.table('test1').column('frame'), slice_frame: db.partitioner.all(50), output_op: 'test_slicing', }) bulk_job = BulkJob(output=output_op, jobs=[job]) tables = db.run(bulk_job, force=True, show_progress=False) num_rows = 0 for (frame_index, buf) in tables[0].column('integer').load(): (val, ) = struct.unpack('=q', buf) assert val == frame_index % 50 num_rows += 1 assert num_rows == db.table('test1').num_rows()
def test_no_workers(no_workers_db): db = no_workers_db frame = db.ops.FrameInput() hist = db.ops.Histogram(frame=frame) output_op = db.ops.Output(columns=[hist]) job = Job(op_args={ frame: db.table('test1').column('frame'), output_op: '_ignore' }) bulk_job = BulkJob(output=output_op, jobs=[job]) exc = False try: output = db.run(bulk_job, show_progress=False, force=True) except ScannerException: exc = True assert exc
def concat_stereo_panorama_chunks(db, chunks, render_params, is_left): num_cams = 14 item_size = 10 print(chunks) assert num_cams == len(chunks) left_inputs = [] right_inputs = [] for c in range(num_cams): left_chunk = db.ops.FrameInput() right_chunk = db.ops.FrameInput() left_inputs.append(left_chunk) right_inputs.append(right_chunk) args = db.protobufs.ConcatPanoramaChunksArgs() args.eqr_width = render_params["EQR_WIDTH"] args.eqr_height = render_params["EQR_HEIGHT"] args.final_eqr_width = render_params["FINAL_EQR_WIDTH"] args.final_eqr_height = render_params["FINAL_EQR_HEIGHT"] args.camera_rig_path = render_params["RIG_JSON_FILE"] args.zero_parallax_dist = 10000 args.interpupilary_dist = 6.4 args.left = False panorama = db.ops.ConcatPanoramaChunks(*(left_inputs + right_inputs), args=args) output_op = db.ops.Output(columns=[panorama]) op_args = {output_op: 'surround360_pano'} for c in range(num_cams): op_args[left_inputs[c]] = chunks[c].column('left_chunk') op_args[right_inputs[c]] = chunks[c].column('right_chunk') job = Job(op_args=op_args) bulk_job = BulkJob(output=output_op, jobs=[job]) tables = db.run(bulk_job, force=True, work_packet_size=item_size, io_packet_size=item_size, pipeline_instances_per_node=render_params["CORES"]) return tables[0]
def test_blur(db): frame = db.ops.FrameInput() range_frame = frame.sample() blurred_frame = db.ops.Blur(frame=range_frame, kernel_size=3, sigma=0.1) output_op = db.ops.Output(columns=[blurred_frame]) job = Job(op_args={ frame: db.table('test1').column('frame'), range_frame: db.sampler.range(0, 30), output_op: 'test_blur', }) bulk_job = BulkJob(output=output_op, jobs=[job]) tables = db.run(bulk_job, force=True, show_progress=False) table = tables[0] fid, frames = next(table.load(['frame'])) frame_array = frames[0] assert fid == 0 assert frame_array.dtype == np.uint8 assert frame_array.shape[0] == 480 assert frame_array.shape[1] == 640 assert frame_array.shape[2] == 3
def test_job_timeout(timeout_db): db = timeout_db frame = db.ops.FrameInput() range_frame = frame.sample() sleep_frame = db.ops.SleepFrame(ignore=range_frame) output_op = db.ops.Output(columns=[sleep_frame]) job = Job(op_args={ frame: db.table('test1').column('frame'), range_frame: db.sampler.range(0, 1), output_op: 'test_timeout', }) bulk_job = BulkJob(output=output_op, jobs=[job]) table = db.run( bulk_job, pipeline_instances_per_node=1, task_timeout=0.1, force=True, show_progress=False) table = table[0] assert table.committed() == False
def hist_job(db, device, opts, num_frames, video_names, sampling): print('Computing a color histogram for each frame...') s = time.time() batch = 20000 failures = 0 max_batches = int(math.ceil(len(video_names) / batch)) for bi, i in enumerate(range(0, len(video_names), batch)): print('Batch {:d}/{:d}...'.format(bi, max_batches)) frame = db.ops.FrameInput() histogram = db.ops.Histogram(frame=frame, device=device, batch=128) hist_sample = histogram.sample() output = db.ops.Output(columns=[hist_sample]) jobs = [] for name, sa in zip(video_names[i:i + batch], sampling[i:i + batch]): job = Job( op_args={ frame: db.table(name).column('frame'), hist_sample: sa, output: name + '_hist' }) jobs.append(job) bulk_job = BulkJob(output=output, jobs=jobs) hist_tables = db.run(bulk_job, force=True, **opts) local_failures = 0 for t in hist_tables: if not t.committed(): local_failures += 1 print('Batch failures: {:d}'.format(local_failures)) failures += local_failures total_time = time.time() - s print('Total failures: {:d}'.format(failures)) print('\nTime: {:.1f}s, {:.1f} fps'.format(total_time, num_frames / (total_time))) return total_time
def detect_poses(db, input_frame_columns, sampling, output_name, batch=1, models_path=None, pose_model_weights_path=None, hand_prototxt_path=None, hand_model_weights_path=None, face_prototxt_path=None, face_model_weights_path=None): if models_path is None: models_path = os.path.join(temp_directory(), 'openpose') pose_fs_url = 'http://posefs1.perception.cs.cmu.edu/OpenPose/models/' # Pose prototxt download_temp_file( 'https://raw.githubusercontent.com/CMU-Perceptual-Computing-Lab/' 'openpose/master/models/pose/coco/pose_deploy_linevec.prototxt', 'openpose/pose/coco/pose_deploy_linevec.prototxt') # Pose model weights download_temp_file( os.path.join(pose_fs_url, 'pose/coco/pose_iter_440000.caffemodel'), 'openpose/pose/coco/pose_iter_440000.caffemodel') # Hands prototxt download_temp_file( 'https://raw.githubusercontent.com/CMU-Perceptual-Computing-Lab/' 'openpose/master/models/hand/pose_deploy.prototxt', 'openpose/hand/pose_deploy.prototxt') # Hands model weights download_temp_file( os.path.join(pose_fs_url, 'hand/pose_iter_102000.caffemodel'), 'openpose/hand/pose_iter_102000.caffemodel') # Face prototxt download_temp_file( 'https://raw.githubusercontent.com/CMU-Perceptual-Computing-Lab/' 'openpose/master/models/face/pose_deploy.prototxt', 'openpose/face/pose_deploy.prototxt') # Face model weights download_temp_file( os.path.join(pose_fs_url, 'face/pose_iter_116000.caffemodel'), 'openpose/face/pose_iter_116000.caffemodel') # Face haar cascades download_temp_file( 'https://raw.githubusercontent.com/CMU-Perceptual-Computing-Lab/' 'openpose/master/models/face/haarcascade_frontalface_alt.xml', 'openpose/face/haarcascade_frontalface_alt.xml') pose_args = db.protobufs.OpenPoseArgs() pose_args.model_directory = models_path pose_args.pose_num_scales = 3 pose_args.pose_scale_gap = 0.33 pose_args.hand_num_scales = 4 pose_args.hand_scale_gap = 0.4 if db.has_gpu(): device = DeviceType.GPU pipeline_instances = -1 else: device = DeviceType.CPU pipeline_instances = 1 frame = db.ops.FrameInput() poses_out = db.ops.OpenPose(frame=frame, device=device, args=pose_args, batch=batch) sampled_poses = poses_out.sample() output = db.ops.Output(columns=[sampled_poses]) jobs = [] for i, input_frame_column in enumerate(input_frame_columns): job = Job( op_args={ frame: input_frame_column, sampled_poses: sampling, output: '{}_{}_poses'.format(output_name, i) }) jobs.append(job) bulk_job = BulkJob(output=output, jobs=jobs) output = db.run(bulk_job, force=True, work_packet_size=8, pipeline_instances_per_node=pipeline_instances) return output
def detect_faces(db, input_frame_columns, output_samplings, output_names, width=960, prototxt_path=None, model_weights_path=None, templates_path=None, return_profiling=False): if prototxt_path is None: prototxt_path = download_temp_file( 'https://storage.googleapis.com/scanner-data/nets/caffe_facenet/facenet_deploy.prototxt' ) if model_weights_path is None: model_weights_path = download_temp_file( 'https://storage.googleapis.com/scanner-data/nets/caffe_facenet/facenet_deploy.caffemodel' ) if templates_path is None: templates_path = download_temp_file( 'https://storage.googleapis.com/scanner-data/nets/caffe_facenet/facenet_templates.bin' ) descriptor = NetDescriptor(db) descriptor.model_path = prototxt_path descriptor.model_weights_path = model_weights_path descriptor.input_layer_names = ['data'] descriptor.output_layer_names = ['score_final'] descriptor.mean_colors = [119.29959869, 110.54627228, 101.8384321] facenet_args = db.protobufs.FacenetArgs() facenet_args.templates_path = templates_path facenet_args.threshold = 0.5 caffe_args = facenet_args.caffe_args caffe_args.net_descriptor.CopyFrom(descriptor.as_proto()) if db.has_gpu(): device = DeviceType.GPU pipeline_instances = -1 else: device = DeviceType.CPU pipeline_instances = 1 if type(output_names) is not list: output_names = [ '{}_{}'.format(output_names, i) for i in range(len(input_frame_columns)) ] else: assert (len(output_names) == len(input_frame_columns)) if type(output_samplings) is not list: output_samplings = [ output_samplings for _ in range(len(input_frame_columns)) ] else: assert (len(output_samplings) == len(input_frame_columns)) outputs = [] scales = [1.0, 0.5, 0.25, 0.125] batch_sizes = [int((2**i)) for i in range(len(scales))] profilers = {} for scale, batch in zip(scales, batch_sizes): facenet_args.scale = scale caffe_args.batch_size = batch frame = db.ops.FrameInput() #resized = db.ops.Resize( # frame = frame, # width = width, height = 0, # min = True, preserve_aspect = True, frame_info = db.ops.InfoFromFrame(frame=frame) facenet_input = db.ops.FacenetInput(frame=frame, args=facenet_args, device=device) facenet = db.ops.Facenet(facenet_input=facenet_input, args=facenet_args, device=device) facenet_output = db.ops.FacenetOutput(facenet_output=facenet, original_frame_info=frame_info, args=facenet_args) sampled_output = facenet_output.sample() output = db.ops.Output(columns=[sampled_output]) jobs = [] for output_name, frame_column, output_sampling in zip( output_names, input_frame_columns, output_samplings): job = Job( op_args={ frame: frame_column, sampled_output: output_sampling, output: '{}_{}'.format(output_name, scale) }) jobs.append(job) bulk_job = BulkJob(output=output, jobs=jobs) output = db.run(bulk_job, force=True, work_packet_size=batch * 4, io_packet_size=batch * 20, pipeline_instances_per_node=pipeline_instances) profilers['scale_{}'.format(scale)] = output[0].profiler() outputs.append(output) # Register nms bbox op and kernel db.register_op('BBoxNMS', [], ['bboxes'], variadic_inputs=True) kernel_path = script_dir + '/bbox_nms_kernel.py' db.register_python_kernel('BBoxNMS', DeviceType.CPU, kernel_path) # scale = max(width / float(max_width), 1.0) scale = 1.0 bbox_inputs = [db.ops.Input() for _ in outputs] nmsed_bboxes = db.ops.BBoxNMS(*bbox_inputs, scale=scale) output = db.ops.Output(columns=[nmsed_bboxes]) jobs = [] for i in range(len(input_frame_columns)): op_args = {} for bi, cols in enumerate(outputs): op_args[bbox_inputs[bi]] = cols[i].column('bboxes') op_args[output] = output_names[i] jobs.append(Job(op_args=op_args)) bulk_job = BulkJob(output=output, jobs=jobs) return db.run(bulk_job, force=True)
def main(movie_path): total_start = time.time() print('Detecting shots in movie {}'.format(movie_path)) movie_name = os.path.basename(movie_path) with open('cinematography_videos.txt', 'r') as f: movie_paths = [l.strip() for l in f.readlines()] movie_names = [os.path.basename(p) for p in movie_paths] # Use GPU kernels if we have a GPU with Database() as db: print('Loading movie into Scanner database...') s = time.time() if db.has_gpu(): device = DeviceType.GPU else: device = DeviceType.CPU ############ ############ ############ ############ # 0. Ingest the video into the database ############ ############ ############ ############ if not db.table(movie_names[0]): movie_tables, failures = db.ingest_videos( zip(movie_names, movie_paths), force=True) print('Time: {:.1f}s'.format(time.time() - s)) print('Number of frames in movie: {:d}'.format(movie_table.num_rows())) print('Failed videos: {:d}'.format(failures)) s = time.time() ############ ############ ############ ############ # 1. Run Histogram over the entire video in Scanner ############ ############ ############ ############ print('Computing a color histogram for each frame...') frame = db.ops.FrameInput() histogram = db.ops.Histogram( frame = frame, device = device) output = db.ops.Output(columns=[histogram]) jobs = [] for name in movie_names: job = Job(op_args={ frame: db.table(name).column('frame'), output: name + '_hist' }) jobs.append(job) bulk_job = BulkJob(output=output, jobs=jobs) [hists_table] = db.run(bulk_job, force=True) print('\nTime: {:.1f}s, {:.1f} fps'.format( time.time() - s, movie_table.num_rows() / (time.time() - s))) s = time.time() exit(0) ############ ############ ############ ############ # 2. Load histograms and compute shot boundaries # in python ############ ############ ############ ############ print('Computing shot boundaries...') # Read histograms from disk hists = [h for _, h in hists_table.load(['histogram'], parsers.histograms)] boundaries = compute_shot_boundaries(hists) print('Found {:d} shots.'.format(len(boundaries))) print('Time: {:.1f}s'.format(time.time() - s)) s = time.time() ############ ############ ############ ############ # 3. Create montage in Scanner ############ ############ ############ ############ print('Creating shot montage...') row_length = 16 rows_per_item = 1 target_width = 256 # Compute partial row montages that we will stack together # at the end frame = db.ops.FrameInput() gather_frame = frame.sample() sliced_frame = gather_frame.slice() montage = db.ops.Montage( frame = sliced_frame, num_frames = row_length * rows_per_item, target_width = target_width, frames_per_row = row_length, device = device) sampled_montage = montage.sample() output = db.ops.Output( columns=[sampled_montage.unslice().lossless()]) item_size = row_length * rows_per_item starts_remainder = len(boundaries) % item_size evenly_divisible = (starts_remainder == 0) if not evenly_divisible: boundaries = boundaries[0:len(boundaries) - starts_remainder] job = Job(op_args={ frame: movie_table.column('frame'), gather_frame: db.sampler.gather(boundaries), sliced_frame: db.partitioner.all(item_size), sampled_montage: [db.sampler.gather([item_size - 1]) for _ in range(len(boundaries) / item_size)], output: 'montage_image' }) bulk_job = BulkJob(output=output, jobs=[job]) [montage_table] = db.run(bulk_job, force=True) # Stack all partial montages together montage_img = np.zeros((1, target_width * row_length, 3), dtype=np.uint8) for idx, img in montage_table.column('montage').load(): img = np.flip(img, 2) montage_img = np.vstack((montage_img, img)) print('') print('Time: {:.1f}s'.format(time.time() - s)) ############ ############ ############ ############ # 4. Write montage to disk ############ ############ ############ ############ cv2.imwrite('shots.jpg', montage_img) print('Successfully generated shots.jpg') print('Total time: {:.2f} s'.format(time.time() - total_start))
def fused_project_flow_and_stereo_chunk(db, videos, videos_idx, render_params, start, end): warmup_size = 10 task_size = 100 left_frame = db.ops.FrameInput() left_cam_idx = db.ops.Input() right_frame = db.ops.FrameInput() right_cam_idx = db.ops.Input() args = db.protobufs.ProjectSphericalArgs() args.eqr_width = render_params["EQR_WIDTH"] args.eqr_height = render_params["EQR_HEIGHT"] args.camera_rig_path = render_params["RIG_JSON_FILE"] left_proj_frame = db.ops.ProjectSpherical(frame=left_frame, camera_id=left_cam_idx, args=args) right_proj_frame = db.ops.ProjectSpherical(frame=right_frame, camera_id=right_cam_idx, args=args) left_flow, right_flow = db.ops.TemporalOpticalFlow( left_projected_frame=left_proj_frame, right_projected_frame=right_proj_frame, flow_algo=render_params["SIDE_FLOW_ALGORITHM"], camera_rig_path=render_params["RIG_JSON_FILE"], warmup=warmup_size) left_chunk, right_chunk = db.ops.RenderStereoPanoramaChunk( left_projected_frame=left_proj_frame, left_flow=left_flow, right_projected_frame=right_proj_frame, right_flow=right_flow, eqr_width=render_params["EQR_WIDTH"], eqr_height=render_params["EQR_HEIGHT"], camera_rig_path=render_params["RIG_JSON_FILE"], flow_algo=render_params["SIDE_FLOW_ALGORITHM"], zero_parallax_dist=10000, interpupilary_dist=6.4) left_chunk_sample = left_chunk.sample() right_chunk_sample = right_chunk.sample() output_op = db.ops.Output( columns=[left_chunk_sample.lossless(), right_chunk_sample.lossless()]) jobs = [] for i in range(len(videos.tables())): left_idx = i right_idx = (left_idx + 1) % len(videos.tables()) sample = db.sampler.range(start, end) job = Job( op_args={ left_frame: videos.tables(left_idx).column('frame'), left_cam_idx: videos_idx.tables(left_idx).column( 'camera_index'), right_frame: videos.tables(right_idx).column('frame'), right_cam_idx: videos_idx.tables(right_idx).column( 'camera_index'), left_chunk_sample: sample, right_chunk_sample: sample, output_op: 'surround360_chunk_{:d}'.format(i), }) jobs.append(job) bulk_job = BulkJob(output=output_op, jobs=jobs) return db.run(bulk_job, force=True, work_packet_size=10, io_packet_size=task_size, pipeline_instances_per_node=render_params["CORES"])
def handle(self, *args, **options): face_labeler = Labeler.objects.get(name=options['labeler']) feature_labeler, _ = Labeler.objects.get_or_create(name='facenet') with open(options['path']) as f: paths = [s.strip() for s in f.readlines()] with Database() as db: db.register_op('EmbedFaces', [('frame', ColumnType.Video), 'bboxes'], ['embeddings']) db.register_python_kernel('EmbedFaces', DeviceType.CPU, cwd + '/embed_kernel.py') frame = db.ops.FrameInput() frame_strided = frame.sample() bboxes = db.ops.Input() embeddings = db.ops.EmbedFaces(frame=frame_strided, bboxes=bboxes) output = db.ops.Output(columns=[embeddings]) jobs = [] face_insts = [] for path in paths: video = Video.objects.get(path=path) faces = Face.objects.filter(frame__video=video, labeler=face_labeler) \ .select_related('frame') \ .order_by('frame__video__id', 'frame__number') faces = [f for f in faces if f.bbox_x2 - f.bbox_x1 >= .04] frame_numbers = [] rows = [] cur_frame = None insts = [] for f in faces: if f.frame.id != cur_frame: cur_frame = f.frame.id rows.append([]) frame_numbers.append(f.frame.number) rows[-1].append( db.protobufs.BoundingBox(x1=f.bbox_x1, x2=f.bbox_x2, y1=f.bbox_y1, y2=f.bbox_y2)) insts.append(f.id) face_insts.append(insts) bbox_table = db.new_table(path + '_bboxes', ['bboxes'], [[r] for r in rows], fn=writers.bboxes, force=True) bbox_table = db.table(path + '_bboxes') jobs.append( Job( op_args={ frame: db.table(path).column('frame'), frame_strided: db.sampler.gather(frame_numbers), bboxes: bbox_table.column('bboxes'), output: path + '_embeddings' })) bulk_job = BulkJob(output=output, jobs=jobs) output_tables = db.run(bulk_job, force=True, pipeline_instances_per_node=1) output_tables = [db.table(path + '_embeddings') for path in paths] features = [] for t, path, insts in zip(output_tables, paths, face_insts): inst_idx = 0 embs = t.column('embeddings').load() for _, emb in embs: for i in range(0, len(emb), 512): e = np.frombuffer(emb[i:i + 512], dtype=np.float32) features.append( FaceFeatures(features=json.dumps(e.tolist()), face_id=insts[inst_idx], labeler=feature_labeler)) inst_idx += 1 FaceFeatures.objects.bulk_create(features)
def handle(self, *args, **options): face_labeler = Labeler.objects.get(name=options['labeler']) gender_labeler, _ = Labeler.objects.get_or_create(name='rude-carnie') with open(options['path']) as f: paths = [s.strip() for s in f.readlines()] with Database() as db: db.register_op('Gender', [('frame', ColumnType.Video), 'bboxes'], ['genders']) db.register_python_kernel('Gender', DeviceType.CPU, cwd + '/gender_kernel.py') frame = db.ops.FrameInput() frame_strided = frame.sample() bboxes = db.ops.Input() embeddings = db.ops.Gender(frame=frame_strided, bboxes=bboxes) output = db.ops.Output(columns=[embeddings]) jobs = [] face_insts = [] for path in paths: video = Video.objects.get(path=path) faces = Face.objects.filter(person__frame__video=video, labeler=face_labeler) \ .select_related('person__frame') \ .order_by('person__frame__video__id', 'person__frame__number') faces = [f for f in faces if f.bbox_y2 - f.bbox_y1 >= .04] frame_numbers = [] rows = [] cur_frame = None for f in faces: if f.person.frame.id != cur_frame: cur_frame = f.person.frame.id rows.append([]) frame_numbers.append(f.person.frame.number) rows[-1].append( db.protobufs.BoundingBox(x1=f.bbox_x1, x2=f.bbox_x2, y1=f.bbox_y1, y2=f.bbox_y2)) face_insts.append(faces) bbox_table = db.new_table(path + '_bboxes', ['bboxes'], [[r] for r in rows], fn=writers.bboxes, force=True) bbox_table = db.table(path + '_bboxes') jobs.append( Job( op_args={ frame: db.table(path).column('frame'), frame_strided: db.sampler.gather(frame_numbers), bboxes: bbox_table.column('bboxes'), output: path + '_genders' })) bulk_job = BulkJob(output=output, jobs=jobs) output_tables = db.run(bulk_job, force=True, pipeline_instances_per_node=1) output_tables = [db.table(path + '_genders') for path in paths] features = [] gender_models = [] for t, path, insts in zip(output_tables, paths, face_insts): inst_idx = 0 genders = t.column('genders').load() for _, g in genders: for i in range(0, len(g), 5): (label, score) = struct.unpack('=cf', g[i:(i + 5)]) face = insts[inst_idx] gender_models.append( FaceGender(gender=Gender.objects.get_or_create( name=label)[0], labeler=gender_labeler, face=face)) inst_idx += 1 FaceGender.objects.bulk_create(gender_models)
def main(num = 1): movie_path = util.download_video(num) print('Detecting shots in movie {}'.format(movie_path)) movie_name = 'shot_detect' # Use GPU kernels if we have a GPU if util.have_gpu(): device = DeviceType.GPU else: device = DeviceType.CPU device = DeviceType.CPU with Database() as db: print('Loading movie into Scanner DB...') total_time = 0.0 start = now() ############ ############ ############ ############ # 0. Ingest the video into the database ############ ############ ############ ############ [movie_table], _ = db.ingest_videos([(movie_name, movie_path)], force=True) stop = now() total_time += stop - start print('Ingest time: {:.4f}s '.format(stop - start)) print('Number of frames in movie: {:d}'.format(movie_table.num_rows())) start = now() ############ ############ ############ ############ # 1. Run Histogram over the entire video in Scanner ############ ############ ############ ############ frame = db.ops.FrameInput() histogram = db.ops.Histogram( frame = frame, device = device) output = db.ops.Output(columns=[histogram]) job = Job(op_args={ frame: movie_table.column('frame'), output: movie_name + '_hist' }) bulk_job = BulkJob(output=output, jobs=[job]) [hists_table] = db.run(bulk_job, force=True) stop = now() total_time += stop - start print('Compute histogram time: {:.4f}s, {:.1f} fps'.format( stop - start, movie_table.num_rows() / (stop - start))) hists_table.profiler().write_trace('shot_detect_hist.trace') start = now() ############ ############ ############ ############ # 2. Load histograms and compute shot boundaries # in python ############ ############ ############ ############ # Read histograms from disk hists = [h for _, h in hists_table.load(['histogram'], parsers.histograms)] boundaries = compute_shot_boundaries(hists) stop = now() total_time += stop - start print('Found {:d} shots.'.format(len(boundaries))) print('Find boundaries time: {:.4f}s'.format(stop - start)) start = now() ############ ############ ############ ############ # 3. Create montage in Scanner ############ ############ ############ ############ row_length = 16 rows_per_item = 1 target_width = 256 item_size = row_length * rows_per_item # Compute partial row montages that we will stack together # at the end frame = db.ops.FrameInput() gather_frame = frame.sample() sliced_frame = gather_frame.slice() montage = db.ops.Montage( frame = sliced_frame, num_frames = item_size, target_width = target_width, frames_per_row = row_length, device = device) sampled_montage = montage.sample() output = db.ops.Output( columns=[sampled_montage.unslice().lossless()]) starts_remainder = len(boundaries) % item_size evenly_divisible = (starts_remainder == 0) if not evenly_divisible: boundaries = boundaries[0:len(boundaries) - starts_remainder] job = Job(op_args={ frame: movie_table.column('frame'), gather_frame: db.sampler.gather(boundaries), sliced_frame: db.partitioner.all(item_size), sampled_montage: [db.sampler.gather([item_size - 1]) for _ in range(len(boundaries) / item_size)], output: 'montage_image' }) bulk_job = BulkJob(output=output, jobs=[job]) [montage_table] = db.run(bulk_job, force=True) # Stack all partial montages together montage_img = np.zeros((1, target_width * row_length, 3), dtype=np.uint8) for idx, img in montage_table.column('montage').load(): img = np.flip(img, 2) montage_img = np.vstack((montage_img, img)) stop = now() total_time += stop - start print('Create Montage time: {:.4f}s'.format(stop - start)) montage_table.profiler().write_trace('shot_detect_montage.trace') start = now() ############ ############ ############ ############ # 4. Write montage to disk ############ ############ ############ ############ cv2.imwrite('detected_shots.jpg', montage_img) stop = now() total_time += stop - start print('Successfully generated detected_shots.jpg') print('Write image time: {:.4f}s'.format(stop - start)) print('Total time: {:.4f}s'.format(total_time))
def test_pymxnet(num = 3, fm_num = 1, out_dir = './', batch = 1): if num > 4: test_video_path = util.download_video2('http://web.stanford.edu/~jamesh93/video/wild480p.mkv') else: test_video_path = util.download_video1(num, fm_num) print('#{:d} video, #{:d} format, outdir: {}'.format(num, fm_num, out_dir)) if util.have_gpu(): device = DeviceType.GPU else: device = DeviceType.CPU script_dir = os.path.dirname(os.path.abspath(__file__)) with Database() as db: # if not os.path.isfile('pymxnet_op/build/libpymxnet_op.so'): # print('You need to build the custom op first: \n' # '$ cd pymxnet_op; mkdir build && cd build; cmake ..; make') # exit() # # To load a custom op into the Scanner runtime, we use db.load_op to open the # # shared library we compiled. If the op takes arguments, it also optionally # # takes a path to the generated python file for the arg protobuf. # db.load_op('pymxnet_op/build/libpymxnet_op.so', 'pymxnet_op/build/pymxnet_pb2.py') db.register_op('PyMxnet', [('frame', ColumnType.Video)], ['class']) kernel_path = script_dir + '/pymxnet_op/pymxnet_op.py' db.register_python_kernel('PyMxnet', DeviceType.CPU, kernel_path, batch=10) start = now() [input_table], failed = db.ingest_videos([ ('test_pymxnet_raw', test_video_path)], force=True) stop = now() delta = stop - start print('Time to ingest videos: {:.4f}s, fps: {:.4f}'.format( delta, input_table.num_rows() / delta)) num_rows = input_table.num_rows() print('Number of frames in movie: {:d}'.format(num_rows)) if len(failed) > 0: print('Failures:', failed) # Start to analyze the movie start = now() frame = db.ops.FrameInput() # Then we use our op just like in the other examples. classes = db.ops.PyMxnet(frame = frame, batch = batch) output_op = db.ops.Output(columns=[classes]) job = Job( op_args={ frame: db.table('test_pymxnet_raw').column('frame'), output_op: 'test_pymxnet_out' } ) bulk_job = BulkJob(output=output_op, jobs=[job]) [output_table] = db.run(bulk_job, force=True, profiling=False, pipeline_instances_per_node=1, work_packet_size=WORK_PACKET_SIZE) stop = now() delta = stop - start print('Batch: {:d} Python MXNet time: {:.4f}s, {:.1f} fps\n'.format( batch, delta, input_table.num_rows() / delta))
def test_fault_tolerance(fault_db): force_kill_spawn_port = 5012 normal_spawn_port = 5013 def worker_killer_task(config, master_address): from scannerpy import ProtobufGenerator, Config, start_worker import time import grpc import subprocess import signal import os c = Config(None) import scanner.metadata_pb2 as metadata_types import scanner.engine.rpc_pb2 as rpc_types import scanner.types_pb2 as misc_types import scannerpy.libscanner as bindings protobufs = ProtobufGenerator(config) # Spawn a worker that we will force kill script_dir = os.path.dirname(os.path.realpath(__file__)) with open(os.devnull, 'w') as fp: p = subprocess.Popen( [ 'python ' + script_dir + '/spawn_worker.py {:d}'.format(force_kill_spawn_port) ], shell=True, stdout=fp, stderr=fp, preexec_fn=os.setsid) # Wait a bit for the worker to do its thing time.sleep(10) # Force kill worker process to trigger fault tolerance os.killpg(os.getpgid(p.pid), signal.SIGTERM) p.kill() p.communicate() # Wait for fault tolerance to kick in time.sleep(15) # Spawn the worker again subprocess.call( [ 'python ' + script_dir + '/spawn_worker.py {:d}'.format(normal_spawn_port) ], shell=True) master_addr = fault_db._master_address killer_process = Process( target=worker_killer_task, args=(fault_db.config, master_addr)) killer_process.daemon = True killer_process.start() frame = fault_db.ops.FrameInput() range_frame = frame.sample() sleep_frame = fault_db.ops.SleepFrame(ignore=range_frame) output_op = fault_db.ops.Output(columns=[sleep_frame]) job = Job(op_args={ frame: fault_db.table('test1').column('frame'), range_frame: fault_db.sampler.range(0, 20), output_op: 'test_fault', }) bulk_job = BulkJob(output=output_op, jobs=[job]) table = fault_db.run( bulk_job, pipeline_instances_per_node=1, force=True, show_progress=False) table = table[0] assert len([_ for _, _ in table.column('dummy').load()]) == 20 # Shutdown the spawned worker channel = grpc.insecure_channel( 'localhost:' + str(normal_spawn_port), options=[('grpc.max_message_length', 24499183 * 2)]) worker = fault_db.protobufs.WorkerStub(channel) try: worker.Shutdown(fault_db.protobufs.Empty()) except grpc.RpcError as e: status = e.code() if status == grpc.StatusCode.UNAVAILABLE: print('could not shutdown worker!') exit(1) else: raise ScannerException('Worker errored with status: {}' .format(status)) killer_process.join()
from scannerpy import Database, DeviceType, BulkJob, Job from scannerpy.stdlib import parsers import numpy as np import cv2 import os.path import util with Database(master='localhost:8080', start_cluster=False) as db: print 'Connected!' example_video_path = 'tvnews/segments/FOXNEWS_20121009_220000_Special_Report_With_Bret_Baier_segment.mp4' [input_table ], failed = db.ingest_videos([('example2', example_video_path)], force=True) frame = db.ops.FrameInput() hist = db.ops.Histogram(frame=frame) output_op = db.ops.Output(columns=[hist]) job = Job(op_args={ frame: db.table('example2').column('frame'), output_op: '_ignore' }) bulk_job = BulkJob(output=output_op, jobs=[job]) output = db.run(bulk_job, force=True)
def start_mxnet_pipeline(test_video_path='videos/example.mp4', out_dir='./', batch=BATCH_SIZE, load_to_disk=False): global timelist if util.have_gpu(): device = DeviceType.GPU print('with GPU device!') else: device = DeviceType.CPU print('only has CPU device!') script_dir = os.path.dirname(os.path.abspath(__file__)) num_rows = 0 # Start Scanner DB, use its load worker to generate .proto and .bin files with Database() as db: # register the fake kernel db.register_op('Fake', [('frame', ColumnType.Video)], ['class']) kernel_path = script_dir + '/fake_op.py' db.register_python_kernel('Fake', device, kernel_path, batch=10) # Choose Fake kernel can be faster, or you can choose PyMxnet # db.register_op('PyMxnet', [('frame', ColumnType.Video)], ['class']) # kernel_path = script_dir + '/pymxnet_op.py' # db.register_python_kernel('PyMxnet', DeviceType.CPU, kernel_path, batch=10) start = now() [input_table ], failed = db.ingest_videos([('end2end_raw', test_video_path)], force=True) stop = now() delta = stop - start print('Time to ingest videos: {:.4f}s, fps: {:.4f}'.format( delta, input_table.num_rows() / delta)) # timelist += '"ingest-video" : %f,' % (delta) timelist["ingest-video"] = delta num_rows = input_table.num_rows() print('Number of frames in movie: {:d}'.format(num_rows)) if len(failed) > 0: print('Failures:', failed) # Start to analyze the movie start = now() frame = db.ops.FrameInput() # Then we use our op just like in the other examples. # Choose Fake kernel can be faster, or you can choose PyMxnet classes = db.ops.Fake(frame=frame, batch=batch) # classes = db.ops.PyMxnet(frame = frame, batch = batch) output_op = db.ops.Output(columns=[classes]) job = Job(op_args={ frame: input_table.column('frame'), output_op: 'end2end_out' }) bulk_job = BulkJob(output=output_op, jobs=[job]) [output_table] = db.run(bulk_job, force=True, profiling=False, pipeline_instances_per_node=1, load_to_disk=load_to_disk, work_packet_size=WORK_PACKET_SIZE) stop = now() delta = stop - start print( 'Batch: {:d} End-to-end Python Kernel time: {:.4f}s, {:.1f} fps\n'. format(batch, delta, input_table.num_rows() / delta)) # timelist += '"scanner-execution" : %f,' % (delta) timelist["scanner-execution"] = delta # output_table.profiler().write_trace( # out_dir + 'end2end_{:d}.trace'.format(batch)) # If not load_to_disk, then it does not go to the next part if load_to_disk == False: video_classes = output_table.load(['class'], parsers.classes) # Loop over the column's rows. # Each row is a tuple of the frame number and value for that row. num_rows = 0 for (frame_index, frame_classes) in video_classes: assert len(frame_classes) == 1 assert frame_classes[0].shape[0] == 1 # print(frame_classes[0]) num_rows += 1 assert num_rows == db.table('end2end_raw').num_rows() print(db.summarize()) exit() # Then start the Lambda part # extract video name videoPrefix = test_video_path.split(".")[-2].split("/")[-1] print('video name is: {:s}'.format(videoPrefix)) # uploadPrefix = UPLOAD_PREFIX + '/' + videoPrefix uploadPrefix = UPLOAD_PREFIX + '/{}_{}'.format(videoPrefix, WORK_PACKET_SIZE) if load_to_disk == True: # Upload all .proto files start = now() fileCount, totalSize = upload_output_to_s3(UPLOAD_BUCKET, uploadPrefix, PROTO_EXT) # Upload all .bin files fileCount, totalSize = upload_output_to_s3(UPLOAD_BUCKET, uploadPrefix, BIN_EXT) stop = now() delta = stop - start print('Upload to S3 time: {:.4f} s'.format(delta)) # timelist += '"upload-s3" : %f,' % (delta) timelist["upload-s3"] = delta # Call Lambdas to decode, provide Bucket Name, File Prefix, Start Frame # Then decoder Lambdas will write to S3, which will trigger MXNet Lambdas start = now() lambdaTotalCount = len(xrange(0, num_rows, WORK_PACKET_SIZE)) bar = progressbar.ProgressBar(maxval=lambdaTotalCount, \ widgets=[progressbar.Bar('=', 'Lambdas [', ']'), ' ', progressbar.Percentage()]) bar.start() lambdaCount = 0 for startFrame in xrange(0, num_rows, WORK_PACKET_SIZE): # print("Invoke lambda for start frame {:d}".format(startFrame)) result = invoke_decoder_lambda(UPLOAD_BUCKET, uploadPrefix, startFrame, batch) if not result: print( 'Fail to invoke for frame {:d}, retry.'.format(startFrame)) res = invoke_decoder_lambda(UPLOAD_BUCKET, uploadPrefix, startFrame, batch) if not res: print('Frame {:d} still failed, exit'.format(startFrame)) exit() lambdaCount += 1 bar.update(lambdaCount) bar.finish() stop = now() delta = stop - start assert (lambdaCount == lambdaTotalCount) print('Triggered #{} Lambdas, time {:.4f} s'.format( lambdaCount, delta)) # timelist += '"invoke-lambda" : %f,' % (delta) timelist["invoke-lambda"] = delta # Wait until all output files appear fileCount = wait_until_all_finished(0, num_rows, batch, videoPrefix) # assert(fileCount == len(xrange(0, num_rows, batch))) totalCount = len(xrange(0, num_rows, batch)) print('Collected {:d} out of {:d} files, error rate: {:.4f}'.format( fileCount, totalCount, (totalCount - fileCount) * 1.0 / totalCount))