예제 #1
0
def main():
    # set test video list
#     video_list = ['CNNW_20160107_180000_Wolf']

    videos = Video.objects.filter(threeyears_dataset=True).all()
    addtional_field = pickle.load(open('/app/data/addtional_field.pkl', 'rb'))
    videos = [video for video in videos if addtional_field[video.id]['valid_transcript']]
    videos = videos[20000:30000]

    # todo: remove videos whose result is already dumped
    
    # get audio length
#     pkl_path = '/app/data/audio_length_dict.pkl'
#     audio_length_dict = pickle.load(open(pkl_path, 'rb'))
#     audio_length = [audio_length_dict[video_name] for video_name in video_list]
    
    # load audios from videos
    audios = [audio.AudioSource(video.for_scannertools(), 
                                frame_size=SEG_LENGTH, 
                                duration=addtional_field[video.id]['audio_duration']) 
              for video in videos]
    
    # set up transcripts 
    captions = [audio.CaptionSource('tvnews/subs10/'+video.item_name(), 
                                    max_time=addtional_field[video.id]['audio_duration'], 
                                    window_size=SEG_LENGTH) 
                for video in videos]
    
    # set up run opts
    run_opts = {'pipeline_instances_per_node': 32, 'checkpoint_frequency': 5}
    
    # set up align opts
    align_opts = {'seg_length' : 60,
                  'max_misalign' : 10,
                  'num_thread' : 1,
                  'exhausted' : False,
                  'align_dir' : None,
                  'res_path' : None,
#                   'align_dir' : '/app/data/subs/orig/',
#                   'res_path' : '/app/result/test_align_3y.pkl',
    }
    
    '''local run'''
#     db = scannerpy.Database()
#     transcript_alignment.align_transcript(db, videos, audios, captions, run_opts, align_opts, cache=False) 
    
    '''kubernete run'''
    cfg = cluster_config(
        num_workers=100,
        worker=worker_config('n1-standard-32'))
    
    with make_cluster(cfg, no_delete=True) as db_wrapper:
        db = db_wrapper.db
        transcript_alignment.align_transcript_pipeline(db=db, audio=audios, captions=captions, cache=False, 
                                                       run_opts=run_opts, align_opts=align_opts)
예제 #2
0
LABELER, _ = Labeler.objects.get_or_create(name='opticalflowhists')
LABELED_TAG, _ = Tag.objects.get_or_create(name='opticalflowhists:labeled')

bad_movie_ids = set([])

#labeled_videos = set([videotag.video_id
#        for videotag in VideoTag.objects.filter(tag=LABELED_TAG).all()])
labeled_videos = set()
all_videos = set(
    [video.id for video in Video.objects.filter(ignore_film=False).all()])
video_ids = sorted(
    list(all_videos.difference(labeled_videos).difference(bad_movie_ids)))

videos = Video.objects.filter(id__in=video_ids).order_by('id')

cfg = cluster_config(num_workers=100, worker=worker_config('n1-standard-32'))
with make_cluster(cfg, no_delete=True) as db_wrapper:
    db = db_wrapper.db
    #if True:
    #    db_wrapper = ScannerWrapper.create()
    #    db = db_wrapper.db

    histograms = st.histograms.compute_flow_histograms(
        db,
        videos=[video.for_scannertools() for video in list(videos)],
        run_opts={
            'work_packet_size': 4,
            'pipeline_instances_per_node': 2,
            'io_packet_size': 2496,
            'checkpoint_frequency': 1,
            'tasks_in_queue_per_pu': 2
예제 #3
0
# with Timer('Histogram'):
#     cfg = cluster_config(
#         num_workers=300,
#         worker=worker_config('n1-standard-16'))
#     with make_cluster(cfg, no_delete=True) as db_wrapper:

# videos = videos
#videos = list(Video.objects.filter(id__gte=91250, id__lte=91350))
# videos = [Video.objects.get(id=63970)]
videos = videos

with Timer('Shot boundaries'):
    cfg = cluster_config(num_workers=60,
                         worker=worker_config('n1-highmem-16'),
                         workers_per_node=2,
                         num_load_workers=1,
                         num_save_workers=2)
    with make_cluster(cfg, no_delete=True) as db_wrapper:

        # from esper.scannerutil import ScannerWrapper
        # if True:
        #     db_wrapper = ScannerWrapper.create()

        db = db_wrapper.db

        job_config = ScannerJobConfig(io_packet_size=10000,
                                      work_packet_size=400,
                                      batch=400)
        hists = run_pipeline(db,
                             videos,
예제 #4
0
videos = Video.objects.filter(id__in=video_ids).order_by('id').all()

print("Getting frames to compute on")
# Get frames that we computed faces on
frames = [
    [
        f.number
        for f in Frame.objects.filter(video_id=video,
            tags__name='face_computed').order_by('number')
    ]
    for video in tqdm(video_ids)
]

# Cluster parameters
cfg = cluster_config(num_workers=80, worker=worker_config('n1-standard-32'),
    pipelines=[st.gender_detection.GenderDetectionPipeline])
#with make_cluster(cfg, no_delete=True) as db_wrapper:
#    db = db_wrapper.db
if True:
    db = scannerpy.Database() 

    print("Loading faces from Scanner")
    # Load faces
    faces = st.face_detection.detect_faces(
        db,
        videos=[video.for_scannertools() for video in videos],
        frames=frames
    )

    print("Detecting genders")
예제 #5
0
    def build_pipeline(self):
        bboxes = self._db.ops.BboxesFromJson(bboxes=self._sources['bboxes'].op)
        return {
            'bboxes':
            self._db.ops.PrepareClothingBbox(
                frame=self._sources['frame_sampled'].op, bboxes=bboxes)
        }


detect_clothing_bboxes = ClothingBboxesPipeline.make_runner()
detect_clothing = ClothingDetectionPipeline.make_runner()

videos = list(Video.objects.all().order_by('id'))

cfg = cluster_config(num_workers=100,
                     worker=worker_config('n1-standard-16', gpu=1),
                     pipelines=[clothing_detection.ClothingDetectionPipeline])

with make_cluster(cfg, sql_pool=2, no_delete=True) as db_wrapper:
    # if True:
    #     db_wrapper = ScannerWrapper.create()

    db = db_wrapper.db

    print('Fetching frames')
    frames = pcache.get('clothing_frames',
                        lambda: par_for(frames_for_video, videos, workers=8))
    videos, frames = unzip([(v, f) for (v, f) in zip(videos, frames)
                            if len(f) > 0])
    videos = list(videos)
    frames = list(frames)
예제 #6
0
            .values('number').order_by('number')]

if False:
    with Timer('benchmark'):
        videos = videos[:30]
        def run_pipeline(db, videos, frames, **kwargs):
            return embed_faces(
                db,
                videos=[v.for_scannertools() for v in videos],
                frames=frames,
                faces=[ScannerSQLTable(Face, v) #num_elements=len(f))
                       for v, f in zip(videos, frames)],
                cache=False,
                **kwargs)

        cfg = cluster_config(
            num_workers=5, worker=worker_config('n1-standard-32'), pipelines=[face_embedding.FaceEmbeddingPipeline])
        configs = [(cfg, [
            ScannerJobConfig(io_packet_size=500, work_packet_size=20, pipelines_per_worker=4),
            ScannerJobConfig(io_packet_size=1000, work_packet_size=20, pipelines_per_worker=4),
            ScannerJobConfig(io_packet_size=1000, work_packet_size=80, pipelines_per_worker=4),
            ScannerJobConfig(io_packet_size=1000, work_packet_size=20, pipelines_per_worker=8),
        ])]
        bench('embedding', {'videos': videos, 'frames': [frames_for_video(v) for v in videos]},
              run_pipeline, configs, no_delete=True, force=True)

    exit()

videos = list(Video.objects.filter(threeyears_dataset=True).order_by('id'))
def load_frames():
    return par_for(frames_for_video, videos, workers=8)
frames = pcache.get('emb_frames', load_frames, force=True)
예제 #7
0
    def build_pipeline(self):
        output_ops = super(FaceDetectionPipeline, self).build_pipeline()
        output_ops['frame_ids'] = self._sources['frame_ids'].op
        return output_ops

detect_faces = FaceDetectionPipeline.make_runner()

videos = list(Video.objects.filter(threeyears_dataset=True).order_by('id'))

if False:
    with Timer('benchmark'):
        videos = videos[:50]
        def run_pipeline(db, videos, frames, **kwargs):
            return face_detection.detect_faces(db, videos=[v.for_scannertools() for v in videos], frames=frames, cache=False, **kwargs)

        cfg = cluster_config(num_workers=5, worker=worker_config('n1-standard-32'))
        configs = [(cfg, [ScannerJobConfig(io_packet_size=1000, work_packet_size=20, batch=1)])]
        bench('face', {'videos': videos, 'frames': [[f['number'] for f in Frame.objects.filter(video=v).values('number').order_by('number')] for v in videos]},
              run_pipeline, configs, no_delete=True, force=True)


with Timer('run'):

    print('Getting frames')
    def load_frames():
        return [[f['number'] for f in Frame.objects.filter(video=v, shot_boundary=False).values('number').order_by('number')]
                for v in tqdm(videos)]
    frames = pcache.get('face_frames', load_frames)

    cfg = cluster_config(
        num_workers=100,
예제 #8
0
        def run_pipeline(db, videos, frames, **kwargs):
            return detect_genders(
                db,
                db_videos=videos,
                videos=[v.for_scannertools() for v in videos],
                frames=frames,
                faces=[
                    ScannerSQLTable(Face, v)  #num_elements=len(f))
                    for v, f in zip(videos, frames)
                ],
                cache=False,
                **kwargs)

        cfg = cluster_config(num_workers=5,
                             worker=worker_config('n1-standard-32'),
                             pipelines=[GenderDetectionPipeline])
        configs = [(cfg, [
            ScannerJobConfig(io_packet_size=1000,
                             work_packet_size=20,
                             pipelines_per_worker=4),
            ScannerJobConfig(io_packet_size=1000,
                             work_packet_size=20,
                             pipelines_per_worker=8),
            ScannerJobConfig(io_packet_size=1000,
                             work_packet_size=20,
                             pipelines_per_worker=16)
        ])]
        bench('gender', {
            'videos': videos,
            'frames': [frames_for_video(v) for v in videos]
예제 #9
0
                        model_key='best_model',
                        adjust_bboxes=adjust_bboxes,
                        device=self._device),
            # 'bboxes': new_bbs
        }


detect_clothing = ClothingDetectionPipeline.make_runner()

from esper.fuckyou import cache_table

if __name__ == '__main__':
    videos = list(Video.objects.all().order_by('id'))

    cfg = cluster_config(
        num_workers=50,
        worker=worker_config('n1-standard-16', gpu=2),
        pipelines=[hairstyle_detection.HairStyleDetectionPipeline])
    #pipelines=[clothing_detection.ClothingDetectionPipeline])

    # with make_cluster(cfg, sql_pool=2, no_delete=True) as db_wrapper:
    if True:
        db_wrapper = ScannerWrapper.create()

        db = db_wrapper.db

        print('Fetching frames')
        frames = pcache.get(
            'clothing_frames',
            lambda: par_for(frames_for_video, videos, workers=8))
        videos, frames = unzip([(v, f) for (v, f) in zip(videos, frames)
                                if len(f) > 0])
예제 #10
0
#]))

print(video_ids, len(labeled_videos), len(video_ids))

videos = Video.objects.filter(id__in=video_ids).order_by('id').all()

print("Getting frames to compute on")
# Get frames that we computed faces on
frames = [[
    f.number for f in Frame.objects.filter(
        video_id=video, tags__name='face_computed').order_by('number')
] for video in tqdm(video_ids)]

# Cluster parameters
cfg = cluster_config(num_workers=200,
                     worker=worker_config('n1-standard-32'),
                     pipelines=[st.object_detection.ObjectDetectionPipeline])
#with make_cluster(cfg, no_delete=True) as db_wrapper:
#    db = db_wrapper.db
if True:
    db = scannerpy.Database()

    print("Detecting objects")
    objects = st.object_detection.detect_objects(
        db,
        videos=[video.for_scannertools() for video in videos],
        frames=frames)

    print("Downloading label definitions")
    LABEL_URL = 'https://storage.googleapis.com/scanner-data/public/mscoco_label_map.pbtxt'
    label_path = download_temp_file(LABEL_URL)
예제 #11
0
video_ids = sorted(list(all_videos.difference(ids_to_exclude)))

print(video_ids, len(labeled_videos), len(video_ids))

videos = Video.objects.filter(id__in=video_ids).order_by('id').all()

print("Getting frames to compute on")
# Get frames that we computed faces on
frames = [[
    f.number for f in Frame.objects.filter(
        video_id=video, tags__name='face_computed').order_by('number')
] for video in tqdm(video_ids)]

# Cluster parameters
cfg = cluster_config(
    num_workers=200,
    worker=worker_config('n1-standard-32'),
    pipelines=[st.face_landmark_detection.FaceLandmarkDetectionPipeline])
with make_cluster(cfg, no_delete=True) as db_wrapper:
    db = db_wrapper.db
    #if True:
    #    db = scannerpy.Database()

    #print("Loading faces from Scanner")
    ## Load faces
    #faces = st.face_detection.detect_faces(
    #    db,
    #    videos=[video.for_scannertools() for video in videos],
    #    frames=frames
    #)

    #print("Detecting face landmarks")
예제 #12
0
    def build_pipeline(self):
        output_ops = super(FaceDetectionPipeline, self).build_pipeline()
        output_ops['frame_ids'] = self._sources['frame_ids'].op
        return output_ops

detect_faces = FaceDetectionPipeline.make_runner()

videos = list(Video.objects.filter(threeyears_dataset=False).order_by('id'))

if False:
    with Timer('benchmark'):
        videos = videos[:50]
        def run_pipeline(db, videos, frames, **kwargs):
            return face_detection.detect_faces(db, videos=[v.for_scannertools() for v in videos], frames=frames, cache=False, **kwargs)

        cfg = cluster_config(num_workers=5, worker=worker_config('n1-standard-32'))
        configs = [(cfg, [ScannerJobConfig(io_packet_size=1000, work_packet_size=20, batch=1)])]
        bench('face', {'videos': videos, 'frames': [[f['number'] for f in Frame.objects.filter(video=v).values('number').order_by('number')] for v in videos]},
              run_pipeline, configs, no_delete=True, force=True)

videos = videos

with Timer('run'):
    cfg = cluster_config(
        num_workers=80,
        worker=worker_config('n1-standard-32'),
        workers_per_node=8,
        num_load_workers=1,
        num_save_workers=1)
    with make_cluster(cfg, sql_pool=4, no_delete=True) as db_wrapper: