def main(): # Look at resize_op/resize_op.cpp to start this tutorial. sc = sp.Client() cwd = os.path.dirname(os.path.abspath(__file__)) if not os.path.isfile(os.path.join(cwd, 'resize_op/build/libresize_op.so')): print( 'You need to build the custom op first: \n' '$ pushd {}/resize_op; mkdir build && cd build; cmake ..; make; popd'. format(cwd)) exit() # To load a custom op into the Scanner runtime, we use db.load_op to open the # shared library we compiled. If the op takes arguments, it also optionally # takes a path to the generated python file for the arg protobuf. sc.load_op( os.path.join(cwd, 'resize_op/build/libresize_op.so'), os.path.join(cwd, 'resize_op/build/resize_pb2.py')) example_video_path = util.download_video() video_stream = sp.NamedVideoStream(sc, 'example', path=example_video_path) frames = sc.io.Input([video_stream]) # Then we use our op just like in the other examples. resized_frames = sc.ops.MyResize(frame=frames, width=200, height=300) output_stream = sp.NamedVideoStream(sc, 'example_resized') output = sc.io.Output(resized_frames, [output_stream]) sc.run(output, sp.PerfParams.estimate()) video_stream.delete(sc) output_stream.delete(sc)
def main(): # Now we can use these new Ops in Scanner: sc = sp.Client() # Download an example video example_video_path = util.download_video() # Create a stream and input to read our example video video_stream = sp.NamedVideoStream(sc, 'example', path=example_video_path) frames = sc.io.Input([video_stream]) resized_fn_frames = sc.ops.resize_fn(frame=frames, width=640, height=480) resized_class_frames = sc.ops.ResizeClass(frame=frames, width=320, height=240) fn_stream = sp.NamedVideoStream(sc, 'fn_frames') fn_output = sc.io.Output(resized_fn_frames, [fn_stream]) class_stream = sp.NamedVideoStream(sc, 'class_frames') class_output = sc.io.Output(resized_class_frames, [class_stream]) sc.run([fn_output, class_output], sp.PerfParams.estimate()) fn_stream.save_mp4('01_resized_fn') class_stream.save_mp4('01_resized_class') for stream in [fn_stream, class_stream]: stream.delete(sc) print('Finished! Two videos were saved to the current directory: ' '01_resized_fn.mp4, 01_resized_class.mp4')
def main(): sc = sp.Client() # Frames on disk can either be stored uncompressed (raw bits) or compressed # (encoded using some form of image or video compression). When Scanner # reads frames from a table, it automatically decodes the data if necessary. # The Op DAG only sees the raw frames. For example, this table is stored # as compressed video. def make_blurred_frame(streams): frames = sc.io.Input(streams) blurred_frames = sc.ops.Blur(frame=frames, kernel_size=3, sigma=0.5) sampled_frames = sc.streams.Range(blurred_frames, [(0, 30)]) return frames, sampled_frames example_video_path = util.download_video() video_stream = sp.NamedVideoStream(sc, 'example', path=example_video_path) # By default, if an Op outputs a frame with 3 channels with type uint8, # those frames will be compressed using video encoding. No other frame # type is currently compressed. frame, blurred_frame = make_blurred_frame([video_stream]) stream = sp.NamedVideoStream(sc, 'output_table_name') output = sc.io.Output(blurred_frame, [stream]) sc.run(output, sp.PerfParams.estimate()) stream.delete(sc) frame, blurred_frame = make_blurred_frame([video_stream]) # The compression parameters can be controlled by annotating the output # of an Op that produces frames low_quality_frame = blurred_frame.compress_video(quality=35) low_quality_stream = sp.NamedVideoStream(sc, 'low_quality_video') output = sc.io.Output(low_quality_frame, [low_quality_stream]) sc.run(output, sp.PerfParams.estimate()) frame, blurred_frame = make_blurred_frame([video_stream]) # If no compression is desired, this can be specified by indicating that # the Op output should be lossless. lossless_frame = blurred_frame.lossless() lossless_stream = sp.NamedVideoStream(sc, 'lossless_video') output = sc.io.Output(lossless_frame, [lossless_stream]) sc.run(output, sp.PerfParams.estimate()) # Any sequence of frames which are saved as a compressed `NamedVideoStream` can # be exported as an mp4 file by calling save_mp4 on the stream. This will output # a file called 'low_quality_video.mp4' in the current directory. low_quality_stream.save_mp4('low_quality_video') low_quality_stream.delete(sc) lossless_stream.delete(sc)
def main(): movie_path = util.download_video() if len(sys.argv) <= 1 else sys.argv[1] print('Detecting shots in movie {}'.format(movie_path)) movie_name = os.path.basename(movie_path) # Use GPU kernels if we have a GPU if have_gpu(): device = DeviceType.GPU scanner_montage = True else: device = DeviceType.CPU scanner_montage = False with Database() as db: print('Loading movie into Scanner database...') s = time.time() [movie_table], _ = db.ingest_videos([(movie_name, movie_path)], force=True) print('Time: {:.1f}s'.format(time.time() - s)) s = time.time() print('Computing a color histogram for each frame...') frame = movie_table.as_op().all() histogram = db.ops.Histogram(frame=frame, device=device) job = Job(columns=[histogram], name=movie_name + '_hist') hists_table = db.run(job, force=True) print('\nTime: {:.1f}s'.format(time.time() - s)) s = time.time() print('Computing shot boundaries...') # Read histograms from disk hists = [ h for _, h in hists_table.load(['histogram'], parsers.histograms) ] boundaries = compute_shot_boundaries(hists) print('Time: {:.1f}s'.format(time.time() - s)) s = time.time() print('Creating shot montage...') if scanner_montage: # Make montage in scanner montage_img = make_montage_scanner(db, movie_table, boundaries) else: # Make montage in python # Loading the frames for each shot boundary frames = movie_table.load(['frame'], rows=boundaries) montage_img = make_montage(len(boundaries), frames) print('') print('Time: {:.1f}s'.format(time.time() - s)) cv2.imwrite('shots.jpg', montage_img) print('Successfully generated shots.jpg')
def main(): sc = sp.Client() example_video_path = util.download_video() video_stream = sp.NamedVideoStream(sc, 'example', path=example_video_path) frames = sc.io.Input([video_stream]) # You can tell Scanner which frames of the video (or which rows of a video # table) you want to sample. Here, we indicate that we want to stride # the frame column by 4 (select every 4th frame) strided_frames = sc.streams.Stride(frames, [4]) # We process the sampled frame same as before. hists = sc.ops.Histogram(frame=strided_frames) hist_stream = sp.NamedVideoStream(sc, 'example_hist_strided') output = sc.io.Output(hists, [hist_stream]) sc.run(output, sp.PerfParams.estimate()) # Loop over the column's rows. Each row is a tuple of the frame number and # value for that row. video_hists = hist_stream.load() num_rows = 0 for frame_hists in video_hists: assert len(frame_hists) == 3 assert frame_hists[0].shape[0] == 16 num_rows += 1 assert num_rows == round(video_stream.len() / 4) video_stream.delete(sc) hist_stream.delete(sc) # Here's some examples of other sampling modes: # Range takes a specific subset of a video. Here, it runs over all frames # from 0 to 100 sc.streams.Range(frames, [(0, 100)]) # Gather takes an arbitrary list of frames from a video. sc.streams.Gather(frames, [[10, 17, 32]])
def main(): # Now we can use these new Ops in Scanner: db = Database() # Download an example video example_video_path = util.download_video() # Ingest it into the database [input_table], _ = db.ingest_videos([('example', example_video_path)], force=True) frame = db.sources.FrameColumn() resized_frame_fn = db.ops.resize_fn(frame=frame, width=640, height=480) resized_frame_class = db.ops.ResizeClass(frame=frame, width=320, height=240) output = db.sinks.FrameColumn(columns={ 'frame1': resized_frame_fn, 'frame2': resized_frame_class }) job = Job(op_args={ frame: input_table.column('frame'), output: 'example_python_op' }) [table] = db.run(output=output, jobs=[job], force=True) table.column('frame1').save_mp4('01_resized_fn') table.column('frame2').save_mp4('01_resized_class') print('Finished! Two videos were saved to the current directory: ' '01_resized_fn.mp4, 01_resized_class.mp4')
from scannerpy import Database, DeviceType, Job from scannerpy.stdlib import NetDescriptor import numpy as np import cv2 import struct import sys import os sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/..') import util video_path = util.download_video() if len(sys.argv) <= 1 else sys.argv[1] print('Performing classification on video {}'.format(video_path)) video_name = os.path.splitext(os.path.basename(video_path))[0] with Database() as db: [input_table], _ = db.ingest_videos([(video_name, video_path)], force=True) descriptor = NetDescriptor.from_file(db, 'nets/resnet.toml') batch_size = 48 frame = db.sources.FrameColumn() caffe_input = db.ops.CaffeInput(frame=frame, net_descriptor=descriptor.as_proto(), batch_size=batch_size, device=DeviceType.GPU) caffe_output = db.ops.Caffe(caffe_frame=caffe_input, net_descriptor=descriptor.as_proto(), batch_size=batch_size, batch=batch_size, device=DeviceType.GPU)
from scannerpy import Database, DeviceType, Job, ColumnType from scannerpy.stdlib import NetDescriptor, parsers, pipelines import math import os import subprocess import cv2 import sys import os.path sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/..') import util script_dir = os.path.dirname(os.path.abspath(__file__)) util.download_video() with Database() as db: video_path = util.download_video() if not db.has_table('example'): print('Ingesting video into Scanner ...') db.ingest_videos([('example', video_path)], force=True) input_table = db.table('example') poses_table = pipelines.detect_poses( db, [input_table], lambda t: t.range(0, 100, task_size = 25), 'example_poses', height = 360)[0] height = 720)[0] print('Drawing on frames...') db.register_op('PoseDraw', [('frame', ColumnType.Video), 'poses'], [('frame', ColumnType.Video)]) db.register_python_kernel('PoseDraw', DeviceType.CPU,
def main(): # Initialize a connection to the Scanner database. Loads configuration from the # ~/.scanner.toml configuration file. db = Database() # Create a Scanner table from our video in the format (table name, # video path). If any videos fail to ingest, they'll show up in the failed # list. If force is true, it will overwrite existing tables of the same # name. example_video_path = util.download_video() _, failed = db.ingest_videos([('example', example_video_path), ('example2', example_video_path), ('thisshouldfail', 'thisshouldfail.mp4')], force=True) print(db.summarize()) print('Failures:', failed) # Scanner processes videos by forming a graph of operations that operate # on input frames from a table and produce outputs to a new table. # FrameColumn declares that we want to read from a table column that # represents a video frame. frame = db.sources.FrameColumn() # These frames are input into a Histogram op that computes a color histogram # for each frame. hist = db.ops.Histogram(frame=frame) # Finally, any columns provided to Output will be saved to the output # table at the end of the computation. Here, 'hist' is the name of the # column for the output table. output_op = db.sinks.Column(columns={'hist': hist}) # A job defines a table you want to create. In op_args, we bind the # FrameColumn from above to the table we want to read from and name # the output table 'example_hist' by binding a string to output_op. job = Job(op_args={ frame: db.table('example').column('frame'), output_op: 'example_hist' }) job2 = Job(op_args={ frame: db.table('example2').column('frame'), output_op: 'example_hist2' }) # This executes the job and produces the output table. You'll see a progress # bar while Scanner is computing the outputs. output_tables = db.run(output=output_op, jobs=[job, job2], force=True) # Load the histograms from a column of the output table. The # readers.histograms function converts the raw bytes output by Scanner # into a numpy array for each channel. video_hists = output_tables[0].column('hist').load(readers.histograms) # Loop over the column's values, a set of 3 histograms (for each color channel) per element. num_rows = 0 for frame_hists in video_hists: assert len(frame_hists) == 3 assert frame_hists[0].shape[0] == 16 num_rows += 1 assert num_rows == db.table('example').num_rows()
def main(): # Startup the Scanner runtime and setup a connection to it. Loads configuration from the # ~/.scanner.toml configuration file. sc = sp.Client() example_video_path = util.download_video() # Scanner processes videos by forming a graph of operations that operate # on input streams and produce output streams. For example, here we can # construct a `NamedVideoStream` which reads from an example video: video_stream1 = sp.NamedVideoStream(sc, 'example1', path=example_video_path) # Now we can start constructing a computation graph. First, we need to declare # our input streams that we are going to be reading from. We'll use the # `NamedVideoStream` we just created to build an `Input` operation: frames = sc.io.Input([video_stream1]) # The output of the `Input` op is an edge in the computation graph which represents # the sequence of values produced by `Input`, which in this case are frames from # the video stream we provided. # Now we will process the frames from `Input` using a `Histogram` op that computes # a color histogram for each frame. hists = sc.ops.Histogram(frame=frames) # Finally, we define an output stream to write the computed histograms to. # To do this, we will create a `NamedStream` (which is just like a `NamedVideoStream` # but for non-video data): named_stream1 = sp.NamedStream(sc, 'example1_hist') # Then, just like we defined an `Input` op to read the input stream, we'll define # an `Output` op to write to the output stream we just defined: output_op = sc.io.Output(hists, [named_stream1]) # Now we can execute this computation graph to produce the output stream. # You'll see a progress bar while Scanner is computing the outputs. # Note that the .run function also takes as input a PerfParams object which contains some # parameters that tune the performance of the job, e.g. how many video frames can fit into memory. # By default, you can use PerfParams.estimate() which heuristically guesses an appropriate set of # parameters (but is not guaranteed to work!). Later tutorials will address how to tune these params. job_id = sc.run(output_op, sp.PerfParams.estimate()) # Scanner also supports operating over batches of streams to allow for more parallelism. # For example, let's define a new graph that operates on two copies of our example video: named_stream1.delete(sc) video_stream2 = sp.NamedVideoStream(sc, 'example2', path=example_video_path) frames = sc.io.Input([video_stream1, video_stream2]) hists = sc.ops.Histogram(frame=frames) named_stream2 = sp.NamedStream(sc, 'example2_hist') output_op = sc.io.Output(hists, [named_stream1, named_stream2]) job_id = sc.run(output_op, sp.PerfParams.estimate()) # For each of the streams we provided to the one `Input` op in our graph, Scanner will # execute the computation graph on the frames from those streams independently. This # mechanism allows you to provide Scanner with potentially thousands of videos you # would like to process, up front. If Scanner was executing on a cluster of machines, # it would be able to parallelize the processing of those videos across the entire cluster. # Now that the graph has been processed, we can load the histograms from our computed stream: num_rows = 0 for hist in named_stream1.load(): assert len(hist) == 3 assert hist[0].shape[0] == 16 num_rows += 1 assert num_rows == video_stream1.len() # Just to cleanup, we'll delete the streams we created: streams = [video_stream1, video_stream2, named_stream1, named_stream2] streams[0].storage().delete(sc, streams)
def main(): db = Database() example_video_path = util.download_video() [input_table], _ = db.ingest_videos([('example', example_video_path)], force=True) frame = db.sources.FrameColumn() # When working with bounded or unbounded stateful operations, it is sometimes # useful to introduce boundaries between sequences of frames which restrict # state being shared between them. For example, if you are tracking objects # in a movie, you likely do not want the same trackers when the camera changes # scenes since the objects you were tracking are no longer there! # Scanner provides support for limiting state propagation across frames through # "slicing" operations. sliced_frame = db.streams.Slice(frame, db.partitioner.all(50)) # Here, we sliced the input frame stream into chunks of 50 elements. What this # means is that any ops which process 'sliced_frame' will *only* be able to # maintain state within each chunk of 50 elements. # For example, let's say we grab the background subtraction op from the previous # tutorial (02_op_attributes) and want to run it on a static camera video which # sometimes jumps forward in time: @scannerpy.register_python_op(bounded_state=60) class BackgroundSubtraction(scannerpy.Kernel): def __init__(self, config): self.config = config self.alpha = config.args['alpha'] self.thresh = config.args['threshold'] def reset(self): self.average_image = None def execute(self, frame: FrameType) -> FrameType: if self.average_image is None: self.average_image = frame mask = np.abs(frame - self.average_image) < 255 * self.thresh mask = np.any(mask, axis=2) masked_image = np.copy(frame) wmask = np.where(mask) masked_image[wmask[0], wmask[1], :] = 0 self.average_image = (self.average_image * (1.0 - self.alpha) + frame * self.alpha) return masked_image # First, we download the static camera video from youtube # subprocess.check_call( # 'youtube-dl -f 137 \'https://youtu.be/cVHqFqNz7eM\' -o test.mp4', # shell=True) # [static_table], _ = db.ingest_videos([('static_video', 'test.mp4')], # force=True) static_table = input_table frame = db.sources.FrameColumn() # Imagine that there are scene changes at frames 1100, 1200, and 1500, To tell # scanner that we do not want background subtraction to cross these boundaries, # we can create a 'partitioner' which splits the input. scene_partitions = db.partitioner.ranges([(1100, 1200), (1200, 1500)]) # Now we slice the input frame sequence into these two partitions using a # slice operation sliced_frame = db.streams.Slice(frame, partitioner=scene_partitions) # Then we perform background subtraction and indicate we need 60 prior # frames to produce correct output masked_frame = db.ops.BackgroundSubtraction(frame=sliced_frame, alpha=0.02, threshold=0.05, bounded_state=60) # Since the background subtraction operation is done, we can unslice the # sequence to join it back into a single contiguous stream. You must unslice # sequences before feeding them back into sinks unsliced_frame = db.streams.Unslice(masked_frame) output = db.sinks.Column(columns={'frame': unsliced_frame}) job = Job(op_args={ frame: static_table.column('frame'), output: '04_masked_video', }) [table] = db.run(output=output, jobs=[job], force=True) table.column('frame').save_mp4('04_masked') videos = [] videos.append('04_masked.mp4') print('Finished! The following videos were written: {:s}'.format( ', '.join(videos)))
def main(): sc = sp.Client() example_video_path = util.download_video() video_stream = sp.NamedVideoStream(sc, 'example', path=example_video_path) frame = sc.io.Input([video_stream]) # When working with bounded or unbounded stateful operations, it is sometimes # useful to introduce boundaries between sequences of frames which restrict # state being shared between them. For example, if you are tracking objects # in a movie, you likely do not want the same trackers when the camera changes # scenes since the objects you were tracking are no longer there! # Scanner provides support for limiting state propagation across frames through # "slicing" operations. sliced_frame = sc.streams.Slice(frame, partitions=[sc.partitioner.all(50)]) # Here, we sliced the input frame stream into chunks of 50 elements. What this # means is that any ops which process 'sliced_frame' will *only* be able to # maintain state within each chunk of 50 elements. # For example, let's say we grab the background subtraction op from the previous # tutorial (02_op_attributes) and want to run it on our example video: @sp.register_python_op(bounded_state=60) class BackgroundSubtraction(sp.Kernel): def __init__(self, config, alpha, threshold): self.config = config self.alpha = alpha self.thresh = threshold def reset(self): self.average_image = None def execute(self, frame: sp.FrameType) -> sp.FrameType: if self.average_image is None: self.average_image = frame mask = np.abs(frame - self.average_image) < 255 * self.thresh mask = np.any(mask, axis=2) masked_image = np.copy(frame) wmask = np.where(mask) masked_image[wmask[0], wmask[1], :] = 0 self.average_image = (self.average_image * (1.0 - self.alpha) + frame * self.alpha) return masked_image frame = sc.io.Input([video_stream]) # Imagine that there are scene changes at frames 1100, 1200, and 1400, To tell # scanner that we do not want background subtraction to cross these boundaries, # we can create a 'partitioner' which splits the input. scene_partitions = sc.partitioner.ranges([(1100, 1200), (1200, 1400)]) # Now we slice the input frame sequence into these two partitions using a # slice operation sliced_frame = sc.streams.Slice(frame, partitions=[scene_partitions]) # Then we perform background subtraction and indicate we need 60 prior # frames to produce correct output masked_frame = sc.ops.BackgroundSubtraction(frame=sliced_frame, alpha=0.02, threshold=0.05, bounded_state=60) # Since the background subtraction operation is done, we can unslice the # sequence to join it back into a single contiguous stream. You must unslice # sequences before feeding them back into sinks unsliced_frame = sc.streams.Unslice(masked_frame) stream = sp.NamedVideoStream(sc, '04_masked_video') output = sc.io.Output(unsliced_frame, [stream]) sc.run(output, sp.PerfParams.estimate()) stream.save_mp4('04_masked') stream.delete(sc) videos = [] videos.append('04_masked.mp4') print('Finished! The following videos were written: {:s}'.format( ', '.join(videos)))
from scannerpy import Database, Job, DeviceType, BulkJob from scannerpy.stdlib import parsers import numpy as np import cv2 import sys import os.path sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/..') import util from timeit import default_timer as now with Database() as db: # Create a Scanner table from our video in the format (table name, # video path). If any videos fail to ingest, they'll show up in the failed # list. If force is true, it will overwrite existing tables of the same # name. example_video_path1 = util.download_video(1) example_video_path2 = util.download_video(2) # test time of ingest start = now() input_tables, failed = db.ingest_videos( [('test_raw1', example_video_path1), ('test_raw2', example_video_path2)], force=True) print('Time to ingest videos: {:.6f}s'.format(now() - start)) print(db.summarize()) print('Failures:', failed) # Scanner processes videos by forming a graph of operations that operate # on input frames from a table and produce outputs to a new table.
from scannerpy import Database, DeviceType, Job from scannerpy.stdlib import NetDescriptor, parsers, bboxes import math import os import subprocess import cv2 import sys import os.path sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/..') import util util.download_video() with Database() as db: # TODO(wcrichto): comment the demo. Make the Scanner philosophy more clear. # Add some figures to the wiki perhaps explaining the high level descriptor = NetDescriptor.from_file(db, 'nets/caffe_facenet.toml') facenet_args = db.protobufs.FacenetArgs() facenet_args.threshold = 0.5 caffe_args = facenet_args.caffe_args caffe_args.net_descriptor.CopyFrom(descriptor.as_proto()) caffe_args.batch_size = 2 print('Ingesting video into Scanner ...') [input_table], _ = db.ingest_videos([('example', util.download_video())], force=True) base_batch = 4 base_size = 1280*720 # TODO(apoms): determine automatically from video current_size = 1280*720
import os.path sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/..') import util ################################################################################ # This tutorial shows how to organize your videos into Collections. # ################################################################################ with Database() as db: # Instead of ingesting each video into a table individually, we can group # video # tables into a single entity called a collection. Here, we create # a collection # called "example_collection" from the video in the previous # example. # Collections do not incur any runtime overhead, but are simply # an abstraction for more easily managing your videos. example_video_path = util.download_video() input_collection, _ = db.ingest_video_collection('example_collection', [example_video_path], force=True) print(db.summarize()) # You can retrieve table objects off the collection. table = output_collection.tables(0) frame = db.ops.FrameInput() hist = db.ops.Histogram(frame=frame) output_op = db.ops.Output(columns=[hist]) # You can use a collection to enumerate tables jobs = [] for table in input_collection.tables(): job = Job(op_args={
def main(num = 1): movie_path = util.download_video(num) print('Detecting shots in movie {}'.format(movie_path)) movie_name = 'shot_detect' # Use GPU kernels if we have a GPU if util.have_gpu(): device = DeviceType.GPU else: device = DeviceType.CPU device = DeviceType.CPU with Database() as db: print('Loading movie into Scanner DB...') total_time = 0.0 start = now() ############ ############ ############ ############ # 0. Ingest the video into the database ############ ############ ############ ############ [movie_table], _ = db.ingest_videos([(movie_name, movie_path)], force=True) stop = now() total_time += stop - start print('Ingest time: {:.4f}s '.format(stop - start)) print('Number of frames in movie: {:d}'.format(movie_table.num_rows())) start = now() ############ ############ ############ ############ # 1. Run Histogram over the entire video in Scanner ############ ############ ############ ############ frame = db.ops.FrameInput() histogram = db.ops.Histogram( frame = frame, device = device) output = db.ops.Output(columns=[histogram]) job = Job(op_args={ frame: movie_table.column('frame'), output: movie_name + '_hist' }) bulk_job = BulkJob(output=output, jobs=[job]) [hists_table] = db.run(bulk_job, force=True) stop = now() total_time += stop - start print('Compute histogram time: {:.4f}s, {:.1f} fps'.format( stop - start, movie_table.num_rows() / (stop - start))) hists_table.profiler().write_trace('shot_detect_hist.trace') start = now() ############ ############ ############ ############ # 2. Load histograms and compute shot boundaries # in python ############ ############ ############ ############ # Read histograms from disk hists = [h for _, h in hists_table.load(['histogram'], parsers.histograms)] boundaries = compute_shot_boundaries(hists) stop = now() total_time += stop - start print('Found {:d} shots.'.format(len(boundaries))) print('Find boundaries time: {:.4f}s'.format(stop - start)) start = now() ############ ############ ############ ############ # 3. Create montage in Scanner ############ ############ ############ ############ row_length = 16 rows_per_item = 1 target_width = 256 item_size = row_length * rows_per_item # Compute partial row montages that we will stack together # at the end frame = db.ops.FrameInput() gather_frame = frame.sample() sliced_frame = gather_frame.slice() montage = db.ops.Montage( frame = sliced_frame, num_frames = item_size, target_width = target_width, frames_per_row = row_length, device = device) sampled_montage = montage.sample() output = db.ops.Output( columns=[sampled_montage.unslice().lossless()]) starts_remainder = len(boundaries) % item_size evenly_divisible = (starts_remainder == 0) if not evenly_divisible: boundaries = boundaries[0:len(boundaries) - starts_remainder] job = Job(op_args={ frame: movie_table.column('frame'), gather_frame: db.sampler.gather(boundaries), sliced_frame: db.partitioner.all(item_size), sampled_montage: [db.sampler.gather([item_size - 1]) for _ in range(len(boundaries) / item_size)], output: 'montage_image' }) bulk_job = BulkJob(output=output, jobs=[job]) [montage_table] = db.run(bulk_job, force=True) # Stack all partial montages together montage_img = np.zeros((1, target_width * row_length, 3), dtype=np.uint8) for idx, img in montage_table.column('montage').load(): img = np.flip(img, 2) montage_img = np.vstack((montage_img, img)) stop = now() total_time += stop - start print('Create Montage time: {:.4f}s'.format(stop - start)) montage_table.profiler().write_trace('shot_detect_montage.trace') start = now() ############ ############ ############ ############ # 4. Write montage to disk ############ ############ ############ ############ cv2.imwrite('detected_shots.jpg', montage_img) stop = now() total_time += stop - start print('Successfully generated detected_shots.jpg') print('Write image time: {:.4f}s'.format(stop - start)) print('Total time: {:.4f}s'.format(total_time))
def main(): db = Database() example_video_path = util.download_video() [input_table], _ = db.ingest_videos([('example', example_video_path)], force=True) videos = [] # Many ops simply involve applying some processing to their inputs and then # returning their outputs. But there are also many operations in video # processing that require the ability to see adjacent frames (such as for # computing optical flow), need to keep state over time (such as for tracking # objects), or need to process multiple elements for efficiency reasons (such as # batching for DNNs). # Scanner ops therefore have several optional attributes that enable them to # support these forms of operations: # 1. Device Type: # Ops can specify that they require CPUs or GPUs by declaring their device # type. By default, the device_type is DeviceType.CPU. @scannerpy.register_python_op(device_type=DeviceType.CPU) def device_resize(config, frame: FrameType) -> FrameType: return cv2.resize(frame, (config.args['width'], config.args['height'])) frame = db.sources.FrameColumn() resized_frame = db.ops.device_resize(frame=frame, width=640, height=480) output = db.sinks.FrameColumn(columns={'frame': resized_frame}) job = Job(op_args={ frame: input_table.column('frame'), output: 'example_resize' }) [table] = db.run(output=output, jobs=[job], force=True) table.column('frame').save_mp4('02_device_resize') videos.append('02_device_resize.mp4') # 2. Batch: # The Op can receive multiple elements at once to enable SIMD or # vector-style processing. @scannerpy.register_python_op(batch=10) def batch_resize(config, frame: Sequence[FrameType]) -> Sequence[FrameType]: output_frames = [] for fr in frame: output_frames.append( cv2.resize(fr, (config.args['width'], config.args['height']))) return output_frames # Here we specify that the resize op should receive a batch of 10 # input elements at once. Logically, each element is still processed # independently but multiple elements are provided to enable efficient # batch processing. If there are not enough elements left in a stream, # the Op may receive less than a batch worth of elements. frame = db.sources.FrameColumn() resized_frame = db.ops.batch_resize(frame=frame, width=640, height=480, batch=10) output = db.sinks.FrameColumn(columns={'frame': resized_frame}) job = Job(op_args={ frame: input_table.column('frame'), output: 'example_batch_resize' }) [table] = db.run(output=output, jobs=[job], force=True) table.column('frame').save_mp4('02_batch_resize') videos.append('02_batch_resize.mp4') # 3. Stencil: # The Op requires a window of input elements (for example, the # previous and next element) at the same time to produce an # output. # Here, we use the stencil attribute to write an optical flow op which # computes flow between the current and next frame. @scannerpy.register_python_op(stencil=[0, 1]) def optical_flow(config, frame: Sequence[FrameType]) -> FrameType: gray1 = cv2.cvtColor(frame[0], cv2.COLOR_BGR2GRAY) gray2 = cv2.cvtColor(frame[1], cv2.COLOR_BGR2GRAY) flow = cv2.calcOpticalFlowFarneback(gray1, gray2, None, 0.5, 3, 15, 3, 5, 1.2, 0) return flow # This op visualizes the flow field by converting it into an rgb image @scannerpy.register_python_op() def visualize_flow(config, flow: FrameType) -> FrameType: hsv = np.zeros(shape=(flow.shape[0], flow.shape[1], 3), dtype=np.uint8) hsv[..., 1] = 255 mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1]) hsv[..., 0] = ang * 180 / np.pi / 2 hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX) rgb = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR) return rgb frame = db.sources.FrameColumn() # This next line is using a feature we'll discuss in the next tutorial, but you # can think of it as selecting a subset of elements from the stream (here, # frames 0 to 30) range_frame = db.streams.Range(frame, 0, 30) flow = db.ops.optical_flow(frame=range_frame, stencil=[0, 1]) flow_viz = db.ops.visualize_flow(flow=flow) output = db.sinks.FrameColumn(columns={'flow_viz': flow_viz}) job = Job(op_args={ frame: input_table.column('frame'), output: 'example_flow' }) [table] = db.run(output=output, jobs=[job], force=True) table.column('flow_viz').save_mp4('02_flow') videos.append('02_flow.mp4') # 4. Bounded State: # For each output, the Op requires at least W sequential # "warmup" elements before it can produce a valid output. # For example, if the output of this Op is sampled # sparsely, this guarantees that the Op can "warmup" # its state on a stream of W elements before producing the # requested output. import subprocess @scannerpy.register_python_op(bounded_state=60) class BackgroundSubtraction(scannerpy.Kernel): def __init__(self, config): self.config = config self.alpha = config.args['alpha'] self.thresh = config.args['threshold'] # Reset is called when the kernel switches to a new part of the stream # and so shouldn't maintain it's previous state def reset(self): self.average_image = None def execute(self, frame: FrameType) -> FrameType: if self.average_image is None: self.average_image = frame mask = np.abs(frame - self.average_image) < 255 * self.thresh mask = np.any(mask, axis=2) masked_image = np.copy(frame) wmask = np.where(mask) masked_image[wmask[0], wmask[1], :] = 0 self.average_image = (self.average_image * (1.0 - self.alpha) + frame * self.alpha) return masked_image # Here we wrote an op that performs background subtraction by keeping a # running average image over the past frames. We set `bounded_state=60` # to indicate that this kernel needs at least 60 frames before the output # should be considered reasonable. # First, we download a static camera video from youtube # subprocess.check_call( # 'youtube-dl -f 137 \'https://youtu.be/cVHqFqNz7eM\' -o test.mp4', # shell=True) # [static_table], _ = db.ingest_videos([('static_video', 'test.mp4')], # force=True) static_table = input_table # Then we perform background subtraction and indicate we need 60 prior # frames to produce correct output frame = db.sources.FrameColumn() masked_frame = db.ops.BackgroundSubtraction(frame=frame, alpha=0.05, threshold=0.05, bounded_state=60) # Here, we say that we only want the outputs for this range of frames sampled_frame = db.streams.Range(masked_frame, 0, 120) output = db.sinks.Column(columns={'frame': sampled_frame}) job = Job(op_args={ frame: static_table.column('frame'), output: 'masked_video', }) [table] = db.run(output=output, jobs=[job], force=True) table.column('frame').save_mp4('02_masked') videos.append('02_masked.mp4') # 5. Unbounded State: # This Op will always process all preceding elements of # its input streams before producing a requested output. # This means that sampling operations after this Op # can not change how many inputs it receives. In the next # tutorial, we will show how this can be relaxed for # sub-streams of the input. @scannerpy.register_python_op(unbounded_state=True) class Example(scannerpy.Kernel): def __init__(self, config): pass def reset(self): pass def execute(self, frame: FrameType) -> bytes: pass print('Finished! The following videos were written: {:s}'.format( ', '.join(videos)))