예제 #1
0
def main():
    # set test video list
#     video_list = ['CNNW_20160107_180000_Wolf']

    videos = Video.objects.filter(threeyears_dataset=True).all()
    addtional_field = pickle.load(open('/app/data/addtional_field.pkl', 'rb'))
    videos = [video for video in videos if addtional_field[video.id]['valid_transcript']]
    videos = videos[20000:30000]

    # todo: remove videos whose result is already dumped
    
    # get audio length
#     pkl_path = '/app/data/audio_length_dict.pkl'
#     audio_length_dict = pickle.load(open(pkl_path, 'rb'))
#     audio_length = [audio_length_dict[video_name] for video_name in video_list]
    
    # load audios from videos
    audios = [audio.AudioSource(video.for_scannertools(), 
                                frame_size=SEG_LENGTH, 
                                duration=addtional_field[video.id]['audio_duration']) 
              for video in videos]
    
    # set up transcripts 
    captions = [audio.CaptionSource('tvnews/subs10/'+video.item_name(), 
                                    max_time=addtional_field[video.id]['audio_duration'], 
                                    window_size=SEG_LENGTH) 
                for video in videos]
    
    # set up run opts
    run_opts = {'pipeline_instances_per_node': 32, 'checkpoint_frequency': 5}
    
    # set up align opts
    align_opts = {'seg_length' : 60,
                  'max_misalign' : 10,
                  'num_thread' : 1,
                  'exhausted' : False,
                  'align_dir' : None,
                  'res_path' : None,
#                   'align_dir' : '/app/data/subs/orig/',
#                   'res_path' : '/app/result/test_align_3y.pkl',
    }
    
    '''local run'''
#     db = scannerpy.Database()
#     transcript_alignment.align_transcript(db, videos, audios, captions, run_opts, align_opts, cache=False) 
    
    '''kubernete run'''
    cfg = cluster_config(
        num_workers=100,
        worker=worker_config('n1-standard-32'))
    
    with make_cluster(cfg, no_delete=True) as db_wrapper:
        db = db_wrapper.db
        transcript_alignment.align_transcript_pipeline(db=db, audio=audios, captions=captions, cache=False, 
                                                       run_opts=run_opts, align_opts=align_opts)
예제 #2
0
LABELER, _ = Labeler.objects.get_or_create(name='opticalflowhists')
LABELED_TAG, _ = Tag.objects.get_or_create(name='opticalflowhists:labeled')

bad_movie_ids = set([])

#labeled_videos = set([videotag.video_id
#        for videotag in VideoTag.objects.filter(tag=LABELED_TAG).all()])
labeled_videos = set()
all_videos = set(
    [video.id for video in Video.objects.filter(ignore_film=False).all()])
video_ids = sorted(
    list(all_videos.difference(labeled_videos).difference(bad_movie_ids)))

videos = Video.objects.filter(id__in=video_ids).order_by('id')

cfg = cluster_config(num_workers=100, worker=worker_config('n1-standard-32'))
with make_cluster(cfg, no_delete=True) as db_wrapper:
    db = db_wrapper.db
    #if True:
    #    db_wrapper = ScannerWrapper.create()
    #    db = db_wrapper.db

    histograms = st.histograms.compute_flow_histograms(
        db,
        videos=[video.for_scannertools() for video in list(videos)],
        run_opts={
            'work_packet_size': 4,
            'pipeline_instances_per_node': 2,
            'io_packet_size': 2496,
            'checkpoint_frequency': 1,
            'tasks_in_queue_per_pu': 2
예제 #3
0
compute_shot_boundaries = ShotBoundaryPipeline.make_runner()

# with Timer('Histogram'):
#     cfg = cluster_config(
#         num_workers=300,
#         worker=worker_config('n1-standard-16'))
#     with make_cluster(cfg, no_delete=True) as db_wrapper:

# videos = videos
#videos = list(Video.objects.filter(id__gte=91250, id__lte=91350))
# videos = [Video.objects.get(id=63970)]
videos = videos

with Timer('Shot boundaries'):
    cfg = cluster_config(num_workers=60,
                         worker=worker_config('n1-highmem-16'),
                         workers_per_node=2,
                         num_load_workers=1,
                         num_save_workers=2)
    with make_cluster(cfg, no_delete=True) as db_wrapper:

        # from esper.scannerutil import ScannerWrapper
        # if True:
        #     db_wrapper = ScannerWrapper.create()

        db = db_wrapper.db

        job_config = ScannerJobConfig(io_packet_size=10000,
                                      work_packet_size=400,
                                      batch=400)
        hists = run_pipeline(db,
예제 #4
0
    def build_pipeline(self):
        bboxes = self._db.ops.BboxesFromJson(bboxes=self._sources['bboxes'].op)
        return {
            'bboxes':
            self._db.ops.PrepareClothingBbox(
                frame=self._sources['frame_sampled'].op, bboxes=bboxes)
        }


detect_clothing_bboxes = ClothingBboxesPipeline.make_runner()
detect_clothing = ClothingDetectionPipeline.make_runner()

videos = list(Video.objects.all().order_by('id'))

cfg = cluster_config(num_workers=100,
                     worker=worker_config('n1-standard-16', gpu=1),
                     pipelines=[clothing_detection.ClothingDetectionPipeline])

with make_cluster(cfg, sql_pool=2, no_delete=True) as db_wrapper:
    # if True:
    #     db_wrapper = ScannerWrapper.create()

    db = db_wrapper.db

    print('Fetching frames')
    frames = pcache.get('clothing_frames',
                        lambda: par_for(frames_for_video, videos, workers=8))
    videos, frames = unzip([(v, f) for (v, f) in zip(videos, frames)
                            if len(f) > 0])
    videos = list(videos)
    frames = list(frames)
예제 #5
0
    def build_pipeline(self):
        output_ops = super(FaceDetectionPipeline, self).build_pipeline()
        output_ops['frame_ids'] = self._sources['frame_ids'].op
        return output_ops

detect_faces = FaceDetectionPipeline.make_runner()

videos = list(Video.objects.filter(threeyears_dataset=True).order_by('id'))

if False:
    with Timer('benchmark'):
        videos = videos[:50]
        def run_pipeline(db, videos, frames, **kwargs):
            return face_detection.detect_faces(db, videos=[v.for_scannertools() for v in videos], frames=frames, cache=False, **kwargs)

        cfg = cluster_config(num_workers=5, worker=worker_config('n1-standard-32'))
        configs = [(cfg, [ScannerJobConfig(io_packet_size=1000, work_packet_size=20, batch=1)])]
        bench('face', {'videos': videos, 'frames': [[f['number'] for f in Frame.objects.filter(video=v).values('number').order_by('number')] for v in videos]},
              run_pipeline, configs, no_delete=True, force=True)


with Timer('run'):

    print('Getting frames')
    def load_frames():
        return [[f['number'] for f in Frame.objects.filter(video=v, shot_boundary=False).values('number').order_by('number')]
                for v in tqdm(videos)]
    frames = pcache.get('face_frames', load_frames)

    cfg = cluster_config(
        num_workers=100,
예제 #6
0
if False:
    with Timer('benchmark'):
        videos = videos[:30]
        def run_pipeline(db, videos, frames, **kwargs):
            return embed_faces(
                db,
                videos=[v.for_scannertools() for v in videos],
                frames=frames,
                faces=[ScannerSQLTable(Face, v) #num_elements=len(f))
                       for v, f in zip(videos, frames)],
                cache=False,
                **kwargs)

        cfg = cluster_config(
            num_workers=5, worker=worker_config('n1-standard-32'), pipelines=[face_embedding.FaceEmbeddingPipeline])
        configs = [(cfg, [
            ScannerJobConfig(io_packet_size=500, work_packet_size=20, pipelines_per_worker=4),
            ScannerJobConfig(io_packet_size=1000, work_packet_size=20, pipelines_per_worker=4),
            ScannerJobConfig(io_packet_size=1000, work_packet_size=80, pipelines_per_worker=4),
            ScannerJobConfig(io_packet_size=1000, work_packet_size=20, pipelines_per_worker=8),
        ])]
        bench('embedding', {'videos': videos, 'frames': [frames_for_video(v) for v in videos]},
              run_pipeline, configs, no_delete=True, force=True)

    exit()

videos = list(Video.objects.filter(threeyears_dataset=True).order_by('id'))
def load_frames():
    return par_for(frames_for_video, videos, workers=8)
frames = pcache.get('emb_frames', load_frames, force=True)
예제 #7
0
        def run_pipeline(db, videos, frames, **kwargs):
            return detect_genders(
                db,
                db_videos=videos,
                videos=[v.for_scannertools() for v in videos],
                frames=frames,
                faces=[
                    ScannerSQLTable(Face, v)  #num_elements=len(f))
                    for v, f in zip(videos, frames)
                ],
                cache=False,
                **kwargs)

        cfg = cluster_config(num_workers=5,
                             worker=worker_config('n1-standard-32'),
                             pipelines=[GenderDetectionPipeline])
        configs = [(cfg, [
            ScannerJobConfig(io_packet_size=1000,
                             work_packet_size=20,
                             pipelines_per_worker=4),
            ScannerJobConfig(io_packet_size=1000,
                             work_packet_size=20,
                             pipelines_per_worker=8),
            ScannerJobConfig(io_packet_size=1000,
                             work_packet_size=20,
                             pipelines_per_worker=16)
        ])]
        bench('gender', {
            'videos': videos,
            'frames': [frames_for_video(v) for v in videos]