Beispiel #1
0
def main():
    db = Database()

    # What if, instead of a video, you had a list of image files that you
    # wanted to process? Scanner provides an extensible interface for reading and
    # writing data to locations other than the database.

    # For example, let's download a few images now and create a list of their paths:

    util.download_images()
    image_paths = [
        'sample-frame-1.jpg', 'sample-frame-2.jpg', 'sample-frame-3.jpg'
    ]

    # Scanner provides a built-in source to read files from the local filesystem:

    compressed_images = db.sources.Files()
    # Like with db.sources.FrameColumn, we will bind the inputs to this source when
    # we define a job later on.

    # Let's write a pipeline that reads our images, resizes them, and writes them
    # back out as files to the filesystem.

    # Since the input images are compressed, we decompress them with the
    # ImageDecoder
    frame = db.ops.ImageDecoder(img=compressed_images)

    resized = db.ops.Resize(frame=frame, width=640, height=360)

    # Rencode the image to jpg
    encoded_frame = db.ops.ImageEncoder(frame=resized, format='jpg')

    # Write the compressed images to files
    output = db.sinks.Files(input=encoded_frame)

    resized_paths = ['resized-1.jpg', 'resized-2.jpg', 'resized-3.jpg']

    job = Job(
        op_args={
            compressed_images: {
                'paths': image_paths
            },
            output: {
                'paths': resized_paths
            }
        })

    db.run(output=output, jobs=[job])

    print('Finished! Wrote the following images: ' + ', '.join(resized_paths))
Beispiel #2
0
    def handle(self, *args, **options):
        with open(options['path']) as f:
            paths = [s.strip() for s in f.readlines()]

        with Database() as db:
            filtered = paths
            labeler, _ = Labeler.objects.get_or_create(
                name=options['bbox_labeler'])

            filtered = []
            for path in paths:
                try:
                    video = Video.objects.get(path=path)
                except Video.DoesNotExist:
                    continue
                if len(
                        Face.objects.filter(person__frame__video=video,
                                            labeler=labeler)) > 0:
                    continue
                filtered.append(path)

            stride = 24

            # Run the detector via Scanner
            faces_c = pipelines.detect_faces(
                db, [db.table(path).column('frame') for path in filtered],
                db.sampler.strided(stride), 'tmp_faces')

            for path, video_faces_table in zip(filtered, faces_c):
                video = Video.objects.filter(path=path).get()

                table = db.table(path)
                imgs = table.load(['frame'],
                                  rows=range(0, table.num_rows(), stride))
                video_faces = video_faces_table.load(
                    ['bboxes'],
                    lambda lst, db: parsers.bboxes(lst[0], db.protobufs))

                for (i, frame_faces), (_, img) in zip(video_faces, imgs):
                    frame = Frame.objects.get(video=video, number=i * stride)
                    for bbox in frame_faces:
                        if labeler.name == 'dummy' and random.randint(0,
                                                                      10) == 1:
                            # generate dummy labels, sometimes
                            # TODO: add boundary checks, shouldn't matter much thouhg.
                            bbox.x1 += 50
                            bbox.x2 += 50
                            bbox.y1 += 50
                            bbox.y2 += 50

                        p = Person(frame=frame)
                        p.save()
                        f = Face(person=p)
                        f.bbox_x1 = bbox.x1 / video.width
                        f.bbox_x2 = bbox.x2 / video.width
                        f.bbox_y1 = bbox.y1 / video.height
                        f.bbox_y2 = bbox.y2 / video.height
                        f.bbox_score = bbox.score
                        f.labeler = labeler
                        f.save()
Beispiel #3
0
def timeout_db():
    # Create new config
    (cfg_path, cfg) = make_config(master_port='5155', worker_port='5160')

    # Setup and ingest video
    master = 'localhost:5155'
    workers = ['localhost:{:04d}'.format(5160 + d) for d in range(4)]
    with Database(
            config_path=cfg_path,
            no_workers_timeout=120,
            master=master,
            workers=workers,
            enable_watchdog=False) as db:
        (vid1_path, vid2_path) = download_videos()

        db.ingest_videos([('test1', vid1_path), ('test2', vid2_path)])

        yield db

        for worker in workers:
            channel = grpc.insecure_channel(worker)
            worker_stub = db.protobufs.WorkerStub(channel)
            try:
                worker_stub.Shutdown(
                    db.protobufs.Empty(), timeout=db._grpc_timeout)
            except grpc.RpcError as e:
                pass

        # Tear down
        run([
            'rm', '-rf', cfg['storage']['db_path'], cfg_path, vid1_path,
            vid2_path
        ])
def main():
    # Look at resize_op/resize_op.cpp to start this tutorial.

    db = Database()

    cwd = os.path.dirname(os.path.abspath(__file__))
    if not os.path.isfile(os.path.join(cwd,
                                       'resize_op/build/libresize_op.so')):
        print(
            'You need to build the custom op first: \n'
            '$ pushd {}/resize_op; mkdir build && cd build; cmake ..; make; popd'
            .format(cwd))
        exit()

    # To load a custom op into the Scanner runtime, we use db.load_op to open the
    # shared library we compiled. If the op takes arguments, it also optionally
    # takes a path to the generated python file for the arg protobuf.
    db.load_op(os.path.join(cwd, 'resize_op/build/libresize_op.so'),
               os.path.join(cwd, 'resize_op/build/resize_pb2.py'))

    frame = db.sources.FrameColumn()
    # Then we use our op just like in the other examples.
    resize = db.ops.MyResize(frame=frame, width=200, height=300)
    output_op = db.sinks.Column(columns={'resized_frame': resize})
    job = Job(op_args={
        frame: db.table('example').column('frame'),
        output_op: 'example_resized',
    })
    db.run(output_op, [job], force=True)
Beispiel #5
0
def main():
    db = Database()

    frame = db.sources.FrameColumn()
    histogram = db.ops.Histogram(frame=frame)
    output_op = db.sinks.Column(columns={'hist': histogram})
    job = Job(
        op_args={
            frame: db.table('example').column('frame'),
            output_op: 'example_hist_profile'
        })
    [output_table] = db.run(output_op, [job], force=True)

    # The profiler contains information about how long different parts of your
    # computation take to run. We use Google Chrome's trace format, which you
    # can view by going to chrome://tracing in Chrome and clicking "load" in
    # the top left.
    output_table.profiler().write_trace('hist.trace')
Beispiel #6
0
def main():
    db = Database()
    frame = db.sources.FrameColumn()

    # You can tell Scanner which frames of the video (or which rows of a video
    # table) you want to sample. Here, we indicate that we want to stride
    # the frame column by 4 (select every 4th frame)
    strided_frame = db.streams.Stride(frame, 4)

    # We process the sampled frame same as before.
    hist = db.ops.Histogram(frame=strided_frame)
    output_op = db.sinks.Column(columns={'hist': hist})

    # For each job, you can specify how sampling should be performed for
    # a specific stream. In the same way we used the op_args argument to bind
    # a table to an input column, we can bind sampling arguments to strided_frame
    # to override the default striding of 4 we specified above
    job = Job(
        op_args={
            frame: db.table('example').column('frame'),
            # The "strided" sampling mode will run over every 8th frame,
            # i.e. frames [0, 8, 16, ...]
            strided_frame: 8,
            output_op: 'example_hist_strided'
        })
    output_tables = db.run(output_op, [job], force=True)

    # Loop over the column's rows. Each row is a tuple of the frame number and
    # value for that row.
    video_hists = output_tables[0].column('hist').load(readers.histograms)
    num_rows = 0
    for frame_hists in video_hists:
        assert len(frame_hists) == 3
        assert frame_hists[0].shape[0] == 16
        num_rows += 1
    assert num_rows == round(db.table('example').num_rows() / 8)

    # Here's some examples of other sampling modes.
    # Range takes a specific subset of a video. Here, it runs over all frames
    # from 0 to 100
    db.streams.Range(frame, 0, 100)

    # Gather takes an arbitrary list of frames from a video.
    db.streams.Gather(frame, [10, 17, 32])
Beispiel #7
0
def extract(frames):
    with Database() as db:
        frame = db.ops.FrameInput()
        gathered = frame.sample()
        # TODO(wcrichto): use GPU for resize if exists
        resized = db.ops.Resize(frame=gathered, width=640, preserve_aspect=True, device=DeviceType.CPU)
        compressed = db.ops.ImageEncoder(frame=resized)
        output = db.ops.Output(columns=[compressed])
        job = Job(op_args={
            frame: db.table(frames[0].video.path).column('frame'),
            gathered: db.sampler.gather([frame.number for frame in frames]),
            output: '_ignore'
        })

        start = now()
        [output] = db.run(BulkJob(output=output, jobs=[job]), force=True)
        _print('Extract: {:.3f}'.format(now() - start))

        start = now()
        jpgs = [(jpg[0], frame) for (_, jpg), frame in zip(output.load(['img']), frames)]
        _print('Loaded: {:.3f}'.format(now() - start))

        if ESPER_ENV == 'google':
            temp_dir = tempfile.mkdtemp()

            def write_jpg((jpg, frame)):
                with open('{}/frame_{}.jpg'.format(temp_dir, frame.id), 'w') as f:
                    f.write(jpg)

            start = now()
            with ThreadPoolExecutor(max_workers=64) as executor:
                list(executor.map(write_jpg, jpgs))
            sp.check_call(
                shlex.split('gsutil -m mv "{}/*" gs://{}/{}/thumbnails/{}'.format(
                    temp_dir, BUCKET, DATA_PATH, DATASET)))
            _print('Write: {:.3f}'.format(now() - start))

        elif ESPER_ENV == 'local':

            try:
                os.makedirs('assets/thumbnails/' + DATASET)
            except OSError:
                pass

            def write_jpg((jpg, frame)):
                with open('assets/thumbnails/{}/frame_{}.jpg'.format(DATASET, frame.id), 'w') as f:
                    f.write(jpg)

            start = now()
            with ThreadPoolExecutor(max_workers=64) as executor:
                list(executor.map(write_jpg, jpgs))
            _print('Write: {:.3f}'.format(now() - start))
        return jpg
Beispiel #8
0
def main():
    movie_path = util.download_video() if len(sys.argv) <= 1 else sys.argv[1]
    print('Detecting shots in movie {}'.format(movie_path))
    movie_name = os.path.basename(movie_path)

    # Use GPU kernels if we have a GPU
    if have_gpu():
        device = DeviceType.GPU
        scanner_montage = True
    else:
        device = DeviceType.CPU
        scanner_montage = False

    with Database() as db:
        print('Loading movie into Scanner database...')
        s = time.time()
        [movie_table], _ = db.ingest_videos([(movie_name, movie_path)],
                                            force=True)
        print('Time: {:.1f}s'.format(time.time() - s))

        s = time.time()
        print('Computing a color histogram for each frame...')
        frame = movie_table.as_op().all()
        histogram = db.ops.Histogram(frame=frame, device=device)
        job = Job(columns=[histogram], name=movie_name + '_hist')
        hists_table = db.run(job, force=True)
        print('\nTime: {:.1f}s'.format(time.time() - s))

        s = time.time()
        print('Computing shot boundaries...')
        # Read histograms from disk
        hists = [
            h for _, h in hists_table.load(['histogram'], parsers.histograms)
        ]
        boundaries = compute_shot_boundaries(hists)
        print('Time: {:.1f}s'.format(time.time() - s))

        s = time.time()
        print('Creating shot montage...')
        if scanner_montage:
            # Make montage in scanner
            montage_img = make_montage_scanner(db, movie_table, boundaries)
        else:
            # Make montage in python
            # Loading the frames for each shot boundary
            frames = movie_table.load(['frame'], rows=boundaries)
            montage_img = make_montage(len(boundaries), frames)

        print('')
        print('Time: {:.1f}s'.format(time.time() - s))

        cv2.imwrite('shots.jpg', montage_img)
        print('Successfully generated shots.jpg')
Beispiel #9
0
def fault_db():
    # Create new config
    #with tempfile.NamedTemporaryFile(delete=False) as f:
    with open('/tmp/config_test', 'w') as f:
        cfg = Config.default_config()
        cfg['storage']['db_path'] = tempfile.mkdtemp()
        cfg['network']['master'] = 'localhost'
        cfg['network']['master_port'] = '5010'
        cfg['network']['worker_port'] = '5011'
        f.write(toml.dumps(cfg))
        cfg_path = f.name

    # Setup and ingest video
    with Database(master='localhost:5010',
                  workers=[],
                  config_path=cfg_path, no_workers_timeout=120) as db:
        # Download video from GCS
        url = "https://storage.googleapis.com/scanner-data/test/short_video.mp4"
        with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as f:
            host = socket.gethostname()
            # HACK: special proxy case for Ocean cluster
            if host in ['ocean', 'crissy', 'pismo', 'stinson']:
                resp = requests.get(
                    url,
                    stream=True,
                    proxies={'https': 'http://proxy.pdl.cmu.edu:3128/'})
            else:
                resp = requests.get(url, stream=True)
            assert resp.ok
            for block in resp.iter_content(1024):
                f.write(block)
            vid1_path = f.name

        # Make a second one shorter than the first
        with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as f:
            vid2_path = f.name
        run([
            'ffmpeg', '-y', '-i', vid1_path, '-ss', '00:00:00', '-t',
            '00:00:10', '-c:v', 'libx264', '-strict', '-2', vid2_path
        ])

        db.ingest_videos([('test1', vid1_path), ('test2', vid2_path)])

        yield db

        # Tear down
        run([
            'rm', '-rf', cfg['storage']['db_path'], cfg_path, vid1_path,
            vid2_path
        ])
Beispiel #10
0
def main():
    # Now we can use these new Ops in Scanner:
    db = Database()

    # Download an example video
    example_video_path = util.download_video()

    # Ingest it into the database
    [input_table], _ = db.ingest_videos([('example', example_video_path)],
                                        force=True)

    frame = db.sources.FrameColumn()

    resized_frame_fn = db.ops.resize_fn(frame=frame, width=640, height=480)

    resized_frame_class = db.ops.ResizeClass(frame=frame,
                                             width=320,
                                             height=240)

    output = db.sinks.FrameColumn(columns={
        'frame1': resized_frame_fn,
        'frame2': resized_frame_class
    })

    job = Job(op_args={
        frame: input_table.column('frame'),
        output: 'example_python_op'
    })

    [table] = db.run(output=output, jobs=[job], force=True)

    table.column('frame1').save_mp4('01_resized_fn')
    table.column('frame2').save_mp4('01_resized_class')

    print('Finished! Two videos were saved to the current directory: '
          '01_resized_fn.mp4, 01_resized_class.mp4')
Beispiel #11
0
def no_workers_db():
    # Create new config
    (cfg_path, cfg) = make_config(master_port='5020', worker_port='5021')

    # Setup and ingest video
    with Database(debug=True, workers=[], config_path=cfg_path) as db:
        (vid1_path, vid2_path) = download_videos()

        db.ingest_videos([('test1', vid1_path), ('test2', vid2_path)])

        yield db

        # Tear down
        run([
            'rm', '-rf', cfg['storage']['db_path'], cfg_path, vid1_path,
            vid2_path
        ])
Beispiel #12
0
def db():
    # Create new config
    (cfg_path, cfg) = make_config()

    # Setup and ingest video
    with Database(config_path=cfg_path, debug=True) as db:
        (vid1_path, vid2_path) = download_videos()

        db.ingest_videos([('test1', vid1_path), ('test2', vid2_path)])

        db.ingest_videos(
            [('test1_inplace', vid1_path), ('test2_inplace', vid2_path)],
            inplace=True)

        yield db

        # Tear down
        run([
            'rm', '-rf', cfg['storage']['db_path'], cfg_path, vid1_path,
            vid2_path
        ])
Beispiel #13
0
    def create(cls, cluster=None, multiworker=False, **kwargs):
        if cluster is not None:
            db = cluster.database(**kwargs)

        else:
            workers = [
                'localhost:{}'.format(5002 + i)
                for i in range(mp.cpu_count() // 8)
            ] if multiworker else None
            # import scannerpy.libscanner as bindings
            # import scanner.metadata_pb2 as metadata_types
            # params = metadata_types.MachineParameters()
            # params.ParseFromString(bindings.default_machine_params())
            # params.num_load_workers = 2
            # params.num_save_workers = 2
            db = Database(
                #machine_params=params.SerializeToString(),
                workers=workers,
                **kwargs)

        return cls(db, cluster)
Beispiel #14
0
def fault_db():
    # Create new config
    (cfg_path, cfg) = make_config(master_port='5010',
                                  worker_port='5011',
                                  path='/tmp/config_test')

    # Setup and ingest video
    with Database(master='localhost:5010',
                  workers=[],
                  config_path=cfg_path,
                  no_workers_timeout=120) as db:
        (vid1_path, vid2_path) = download_videos()

        db.ingest_videos([('test1', vid1_path), ('test2', vid2_path)])

        yield db

        # Tear down
        run([
            'rm', '-rf', cfg['storage']['db_path'], cfg_path, vid1_path,
            vid2_path
        ])
Beispiel #15
0
def timeout_db():
    # Create new config
    (cfg_path, cfg) = make_config(master_port='5155', worker_port='5160')

    # Setup and ingest video
    master = 'localhost:5155'
    workers = ['localhost:{:04d}'.format(5160 + d) for d in range(4)]
    with Database(config_path=cfg_path,
                  no_workers_timeout=120,
                  master=master,
                  workers=workers) as db:
        (vid1_path, vid2_path) = download_videos()

        db.ingest_videos([('test1', vid1_path), ('test2', vid2_path)])

        yield db

        # Tear down
        run([
            'rm', '-rf', cfg['storage']['db_path'], cfg_path, vid1_path,
            vid2_path
        ])
def main():
    with Database() as db:
        if not db.has_table('example'):
            print 'Ingesting video'
            db.ingest_videos([('example', '/bigdata/wcrichto/videos/movies/fightClub.mp4')], force=True)

        if not db.has_table('example_jpg'):
            print 'Ingesting images'
            num_rows = db.table('example').num_rows()
            rows = [[open('frames-max/{:06d}.jpg'.format(i+1)).read()] for i in range(num_rows)]
            db.new_table('example_jpg', ['jpg'], rows, force=True)

        t = decode(db, db.table('example'))
        print 'Video (CPU)', t

        t = decode(db, db.table('example'), device = DeviceType.GPU)
        print 'Video (GPU)', t

        t = decode(db, db.table('example_jpg'), image=True)
        print 'Images (CPU)', t

        t = decode(db, db.table('example_jpg'), image=True, device = DeviceType.GPU)
        print 'Images (GPU)', t
Beispiel #17
0
def db():
    # Create new config
    with tempfile.NamedTemporaryFile(delete=False) as f:
        cfg = Config.default_config()
        cfg['storage']['db_path'] = tempfile.mkdtemp()
        f.write(toml.dumps(cfg))
        cfg_path = f.name

    # Setup and ingest video
    with Database(config_path=cfg_path, debug=True) as db:
        url = "https://storage.googleapis.com/scanner-data/test/short_video.mp4"
        with tempfile.NamedTemporaryFile(delete=False) as f:
            resp = requests.get(url, stream=True)
            assert resp.ok
            for block in resp.iter_content(1024):
                f.write(block)
            vid_path = f.name
        db.ingest_videos([('test', vid_path)])

        yield db

        # Tear down
        subprocess.check_call(
            ['rm', '-rf', cfg['storage']['db_path'], cfg_path, vid_path])
Beispiel #18
0
def main(num = 1):
  
  movie_path = util.download_video(num)
  print('Detecting shots in movie {}'.format(movie_path))
  movie_name = 'shot_detect'

  # Use GPU kernels if we have a GPU
  if util.have_gpu():
    device = DeviceType.GPU
  else:
    device = DeviceType.CPU

  device = DeviceType.CPU

  with Database() as db:
    print('Loading movie into Scanner DB...')
    total_time = 0.0
    start = now()

    ############ ############ ############ ############
    # 0. Ingest the video into the database
    ############ ############ ############ ############
    [movie_table], _ = db.ingest_videos([(movie_name, movie_path)],
                      force=True)
    stop = now()
    total_time += stop - start
    print('Ingest time: {:.4f}s '.format(stop - start))
    print('Number of frames in movie: {:d}'.format(movie_table.num_rows()))

    start = now()
    ############ ############ ############ ############
    # 1. Run Histogram over the entire video in Scanner
    ############ ############ ############ ############
    frame = db.ops.FrameInput()
    histogram = db.ops.Histogram(
      frame = frame,
      device = device)
    output = db.ops.Output(columns=[histogram])
    job = Job(op_args={
      frame: movie_table.column('frame'),
      output: movie_name + '_hist'
    })
    bulk_job = BulkJob(output=output, jobs=[job])
    [hists_table] = db.run(bulk_job, force=True)

    stop = now()
    total_time += stop - start
    print('Compute histogram time: {:.4f}s, {:.1f} fps'.format(
      stop - start, movie_table.num_rows() / (stop - start)))

    hists_table.profiler().write_trace('shot_detect_hist.trace')

    start = now()
    ############ ############ ############ ############
    # 2. Load histograms and compute shot boundaries
    #  in python
    ############ ############ ############ ############
    # Read histograms from disk
    hists = [h for _, h in hists_table.load(['histogram'],
                        parsers.histograms)]
    boundaries = compute_shot_boundaries(hists)
    stop = now()
    total_time += stop - start
    print('Found {:d} shots.'.format(len(boundaries)))
    print('Find boundaries time: {:.4f}s'.format(stop - start))

    start = now()
    ############ ############ ############ ############
    # 3. Create montage in Scanner
    ############ ############ ############ ############

    row_length = 16
    rows_per_item = 1
    target_width = 256
    item_size = row_length * rows_per_item

    # Compute partial row montages that we will stack together
    # at the end
    frame = db.ops.FrameInput()
    gather_frame = frame.sample()
    sliced_frame = gather_frame.slice()
    montage = db.ops.Montage(
      frame = sliced_frame,
      num_frames = item_size,
      target_width = target_width,
      frames_per_row = row_length,
      device = device)
    sampled_montage = montage.sample()
    output = db.ops.Output(
      columns=[sampled_montage.unslice().lossless()])

    starts_remainder = len(boundaries) % item_size
    evenly_divisible = (starts_remainder == 0)
    if not evenly_divisible:
      boundaries = boundaries[0:len(boundaries) - starts_remainder]

    job = Job(op_args={
      frame: movie_table.column('frame'),
      gather_frame: db.sampler.gather(boundaries),
      sliced_frame: db.partitioner.all(item_size),
      sampled_montage: [db.sampler.gather([item_size - 1])
                for _ in range(len(boundaries) / item_size)],
      output: 'montage_image'
    })

    bulk_job = BulkJob(output=output, jobs=[job])

    [montage_table] = db.run(bulk_job, force=True)
    
    # Stack all partial montages together
    montage_img = np.zeros((1, target_width * row_length, 3), dtype=np.uint8)
    for idx, img in montage_table.column('montage').load():
      img = np.flip(img, 2)
      montage_img = np.vstack((montage_img, img))

    stop = now()
    total_time += stop - start
    print('Create Montage time: {:.4f}s'.format(stop - start))
    montage_table.profiler().write_trace('shot_detect_montage.trace')

    start = now()
    ############ ############ ############ ############
    # 4. Write montage to disk
    ############ ############ ############ ############
    cv2.imwrite('detected_shots.jpg', montage_img)
    stop = now()
    total_time += stop - start
    print('Successfully generated detected_shots.jpg')
    print('Write image time: {:.4f}s'.format(stop - start))
    print('Total time: {:.4f}s'.format(total_time))
Beispiel #19
0
from scannerpy import Database, DeviceType, Job
import os
import os.path as osp
import numpy as np
import time
import sys

if len(sys.argv) <= 1:
    print('Usage: main.py <video_file>')
    exit(1)

video_path = sys.argv[1]
print('Performing optical flow on {}...'.format(video_path))
video_name = os.path.splitext(os.path.basename(video_path))[0]

db = Database()
if not db.has_table(video_name):
    db.ingest_videos([(video_name, video_path)])
input_table = db.table(video_name)

frame = db.sources.FrameColumn()
flow = db.ops.OpticalFlow(
    frame = frame,
    device=DeviceType.CPU)
sampled_flow = db.streams.Range(flow, 0, 60)
output = db.sinks.Column(columns={'flow': sampled_flow})

job = Job(op_args={
    frame: input_table.column('frame'),
    output: input_table.name() + '_flow'
})
Beispiel #20
0
def test_pymxnet(num = 3, fm_num = 1, out_dir = './', batch = 1):

  if num > 4:
    test_video_path = util.download_video2('http://web.stanford.edu/~jamesh93/video/wild480p.mkv')
  else:
    test_video_path = util.download_video1(num, fm_num)

  print('#{:d} video, #{:d} format, outdir: {}'.format(num, fm_num, out_dir))
  if util.have_gpu():
    device = DeviceType.GPU
  else:
    device = DeviceType.CPU

  script_dir = os.path.dirname(os.path.abspath(__file__))

  with Database() as db:
    # if not os.path.isfile('pymxnet_op/build/libpymxnet_op.so'):
    #   print('You need to build the custom op first: \n'
    #       '$ cd pymxnet_op; mkdir build && cd build; cmake ..; make')
    #   exit()

    # # To load a custom op into the Scanner runtime, we use db.load_op to open the
    # # shared library we compiled. If the op takes arguments, it also optionally
    # # takes a path to the generated python file for the arg protobuf.
    # db.load_op('pymxnet_op/build/libpymxnet_op.so', 'pymxnet_op/build/pymxnet_pb2.py')
    db.register_op('PyMxnet', [('frame', ColumnType.Video)], ['class'])
    kernel_path = script_dir + '/pymxnet_op/pymxnet_op.py'
    db.register_python_kernel('PyMxnet', DeviceType.CPU, kernel_path, batch=10)

    start = now()
    [input_table], failed = db.ingest_videos([ 
        ('test_pymxnet_raw', test_video_path)], force=True)
    stop = now()
    delta = stop - start
    print('Time to ingest videos: {:.4f}s, fps: {:.4f}'.format(
      delta, input_table.num_rows() / delta))
    num_rows = input_table.num_rows()
    print('Number of frames in movie: {:d}'.format(num_rows))
    
    if len(failed) > 0:
      print('Failures:', failed)

    # Start to analyze the movie
    start = now()
    frame = db.ops.FrameInput()
    # Then we use our op just like in the other examples.
    classes = db.ops.PyMxnet(frame = frame, batch = batch)
    output_op = db.ops.Output(columns=[classes])
    job = Job(
      op_args={
        frame: db.table('test_pymxnet_raw').column('frame'),
        output_op: 'test_pymxnet_out'
      }
    )
    bulk_job = BulkJob(output=output_op, jobs=[job])
    [output_table] = db.run(bulk_job, force=True, profiling=False, pipeline_instances_per_node=1, work_packet_size=WORK_PACKET_SIZE)

    stop = now()
    delta = stop - start
    print('Batch: {:d} Python MXNet time: {:.4f}s, {:.1f} fps\n'.format(
        batch, delta, input_table.num_rows() / delta))
Beispiel #21
0
    jq '.items[0].spec.nodeName' -r | \
    xargs -I {} kubectl get nodes/{} -o json | \
    jq '.status.addresses[] | select(.type == "ExternalIP") | .address' -r
    ''',
                         shell=True).strip().decode('utf-8')

    port = sp.check_output('''
    kubectl get svc/scanner-master -o json | \
    jq '.spec.ports[0].nodePort' -r
    ''',
                           shell=True).strip().decode('utf-8')

    master = '{}:{}'.format(ip, port)
    print(master)
    db = Database(master=master,
                  start_cluster=False,
                  config_path='./config.toml',
                  grpc_timeout=60)
else:
    db = Database()

cwd = os.path.dirname(os.path.abspath(__file__))
if not os.path.isfile(os.path.join(cwd, 'segment_op/build/libsegment_op.so')):
    print(
        'You need to build the custom op first: \n'
        '$ pushd {}/segment_op; mkdir build && cd build; cmake ..; make; popd'.
        format(cwd))
    exit()

# To load a custom op into the Scanner runtime, we use db.load_op to open the
# shared library we compiled. If the op takes arguments, it also optionally
# takes a path to the generated python file for the arg protobuf.
        jq '.items[0].spec.nodeName' -r | \
        xargs -I {} kubectl get nodes/{} -o json | \
        jq '.status.addresses[] | select(.type == "ExternalIP") | .address' -r
        ''',
                             shell=True).strip().decode('utf-8')

        port = sp.check_output('''
        kubectl get svc/scanner-master -o json | \
        jq '.spec.ports[0].nodePort' -r
        ''',
                               shell=True).strip().decode('utf-8')

        master = '{}:{}'.format(ip, port)
        print(master)
        db = Database(master=master,
                      start_cluster=False,
                      config_path='./config.toml',
                      grpc_timeout=60)
    else:
        db = Database()

    config = db.config.config['storage']
    params = {
        'bucket': opt.bucket,
        'storage_type': config['type'],
        'endpoint': 'storage.googleapis.com',
        'region': 'US'
    }

    encoded_image = db.sources.Files(**params)
    frame = db.ops.ImageDecoder(img=encoded_image)
Beispiel #23
0
                if pose[i, 2] < 0.35: continue
                x = int(pose[i, 0] * frame.shape[1])
                y = int(pose[i, 1] * frame.shape[0])
                cv2.circle(frame, (x, y), 8, (255, 0, 0), 3)
        return frame


if len(sys.argv) <= 1:
    print('Usage: main.py <video_file>')
    exit(1)

movie_path = sys.argv[1]
print('Detecting poses in video {}'.format(movie_path))
movie_name = os.path.splitext(os.path.basename(movie_path))[0]

db = Database()
video_path = movie_path
if not db.has_table(movie_name):
    print('Ingesting video into Scanner ...')
    db.ingest_videos([(movie_name, video_path)], force=True)
input_table = db.table(movie_name)

sampler = db.streams.Range
sampler_args = {'start': 120, 'end': 480}

[poses_table] = pipelines.detect_poses(db, [input_table.column('frame')],
                                       sampler, sampler_args,
                                       '{:s}_poses'.format(movie_name))

print('Drawing on frames...')
frame = db.sources.FrameColumn()
Beispiel #24
0
    p.add_argument(
        '--video-path',
        type=str,
        required=True,
        help=('Path to video to process.'))

    args = p.parse_args()

    weights_path = args.weights_path
    config_path = args.config_path
    movie_path = args.video_path

    print('Detecting objects in movie {}'.format(movie_path))
    movie_name = os.path.splitext(os.path.basename(movie_path))[0]

    db = Database()
    [input_table], failed = db.ingest_videos(
        [('example', movie_path)], force=True)

    frame = db.sources.FrameColumn()
    strided_frame = db.streams.Range(frame, 0, 60)

    # Call the newly created object detect op
    cls_boxes, cls_segms, cls_keyps = db.ops.Detectron(
        frame=strided_frame,
        config_path=config_path,
        weights_path=weights_path,
        device=DeviceType.GPU)

    objdet_frame = db.ops.DetectronVizualize(
       frame=strided_frame,
Beispiel #25
0
        jq '.items[0].spec.nodeName' -r | \
        xargs -I {} kubectl get nodes/{} -o json | \
        jq '.status.addresses[] | select(.type == "ExternalIP") | .address' -r
        ''',
                             shell=True).strip().decode('utf-8')

        port = sp.check_output('''
        kubectl get svc/scanner-master -o json | \
        jq '.spec.ports[0].nodePort' -r
        ''',
                               shell=True).strip().decode('utf-8')

        master = '{}:{}'.format(ip, port)
        print(master)
        db = Database(master=master,
                      start_cluster=False,
                      config_path='./config.toml',
                      grpc_timeout=60)
        print('db was created.')
    else:
        db = Database()

    cwd = os.path.dirname(os.path.abspath(__file__))
    # cwd = '/home/krematas/code/scannerapps/soccer/instance_segmentation/'
    if not os.path.isfile(os.path.join(cwd, 'edges_op/build/libedges_op.so')):
        print(
            'You need to build the custom op first: \n'
            '$ pushd {}/edges_op; mkdir build && cd build; cmake ..; make; popd'
            .format(cwd))
        exit()

    # To load a custom op into the Scanner runtime, we use db.load_op to open the
Beispiel #26
0
from scannerpy import Database, DeviceType, Job, ColumnType, FrameType
import pickle


def test_python_source(db):
    # Write test files
    py_data = [{'{:d}'.format(i): i} for i in range(4)]

    data = db.sources.Python()
    pass_data = db.ops.Pass(input=data)
    output_op = db.sinks.Column(columns={'dict': pass_data})
    job = Job(op_args={
        data: {
            'data': pickle.dumps(py_data)
        },
        output_op: 'test_python_source',
    })

    tables = db.run(output_op, [job], force=True, show_progress=False)

    num_rows = 0
    for i, buf in enumerate(tables[0].column('dict').load()):
        d = pickle.loads(buf)
        assert d['{:d}'.format(i)] == i
        num_rows += 1
    assert num_rows == 4


db = Database()
test_python_source(db)
Beispiel #27
0
from scannerpy import Database, Job, DeviceType

################################################################################
# This tutorial shows how to look at profiling information for your job.       #
################################################################################

db = Database()

frame = db.sources.FrameColumn()
histogram = db.ops.Histogram(frame=frame)
output_op = db.sinks.Column(columns={'hist': histogram})
job = Job(op_args={
    frame: db.table('example').column('frame'),
    output_op: 'example_hist_profile'
})
[output_table] = db.run(output_op, [job], force=True)

# The profiler contains information about how long different parts of your
# computation take to run. We use Google Chrome's trace format, which you
# can view by going to chrome://tracing in Chrome and clicking "load" in
# the top left.
output_table.profiler().write_trace('hist.trace')

# Each row corresponds to a different part of the system, e.g. the thread
# loading bytes from disk or the thread running your kernels. If you have
# multiple pipelines or multiple nodes, you will see many of these evaluate
# threads.
Beispiel #28
0
from scannerpy import Database, DeviceType, BulkJob, Job
from scannerpy.stdlib import parsers
import numpy as np
import cv2
import os.path
import util

with Database(master='localhost:8080', start_cluster=False) as db:
    print 'Connected!'

    example_video_path = 'tvnews/segments/FOXNEWS_20121009_220000_Special_Report_With_Bret_Baier_segment.mp4'

    [input_table
     ], failed = db.ingest_videos([('example2', example_video_path)],
                                  force=True)

    frame = db.ops.FrameInput()
    hist = db.ops.Histogram(frame=frame)
    output_op = db.ops.Output(columns=[hist])

    job = Job(op_args={
        frame: db.table('example2').column('frame'),
        output_op: '_ignore'
    })
    bulk_job = BulkJob(output=output_op, jobs=[job])

    output = db.run(bulk_job, force=True)
Beispiel #29
0
from scannerpy import Database, Job, DeviceType

################################################################################
# This tutorial shows how to combine multiple operators into a computation     #
# graph and wire inputs/outputs.                                               #
################################################################################

with Database() as db:

    # Scanner can take a directed acyclic graph (DAG) of operators and pass data
    # between them. Each graph has starts with data from an input table.
    frame, frame_info = db.table('example').as_op().all()

    blurred_frame, _ = db.ops.Blur(frame=frame,
                                   frame_info=frame_info,
                                   kernel_size=3,
                                   sigma=0.5)

    # Multiple operators can be hooked up in a computation by using the outputs
    # of one as the inputs of another.
    histogram = db.ops.Histogram(frame=blurred_frame, frame_info=frame_info)

    job = Job(columns=[histogram], name='output_table_name')

    db.run(job, force=True)
Beispiel #30
0
from scannerpy import Database, Job, DeviceType

################################################################################
# This tutorial discusses how Scanner compresses output columns, how to        #
# control how and when this compression happens, and how to export compressed  #
# video files.
################################################################################

db = Database()


# Frames on disk can either be stored uncompressed (raw bits) or compressed
# (encoded using some form of image or video compression). When Scanner
# reads frames from a table, it automatically decodes the data if necessary.
# The Op DAG only sees the raw frames. For example, this table is stored
# as compressed video.
def make_blurred_frame():
    frame = db.sources.FrameColumn()
    blurred_frame = db.ops.Blur(frame=frame, kernel_size=3, sigma=0.5)
    sampled_frame = db.streams.Range(blurred_frame, 0, 30)
    return frame, sampled_frame


# By default, if an Op outputs a frame with 3 channels with type uint8,
# those frames will be compressed using video encoding. No other frame
# type is currently compressed.
frame, blurred_frame = make_blurred_frame()
output_op = db.sinks.Column(columns={'frame': blurred_frame})
job = Job(op_args={
    frame: db.table('example').column('frame'),
    output_op: 'output_table_name',