def download_images(images, paths):
    """Download a collection of images.

    Args:
        images: List of (video id, timestamp)s.
        paths: Corresponding list of paths where the images will be downloaded.
    """
    for image, path in zip(images, paths):
        vid, ts = image
        Video.get(vid).download_image(ts, path)
def generate_info_files(s3_info_filename, info_filename, pos_dir, neg_dir):
    "generate info files for training and testing"
    if not os.path.exists(pos_dir):
        os.makedirs(pos_dir)
    if not os.path.exists(neg_dir):
        os.makedirs(neg_dir)
    bucket = affine_config.s3_bucket()
    fp = open(s3_info_filename, 'r')
    fo = open(info_filename, 'w')
    for f in fp:
        line = f.split(' ')
        image_label = int(line[1])
        video_id = int(line[2])
        time_stamp = int(line[3])
        filename = '%012d_%012d.jpg' % (video_id, time_stamp)
        video = Video.get(video_id)
        if video:
            all_tmps = video.s3_timestamps()
            if time_stamp in all_tmps:
                if image_label > 0:
                    outfile = os.path.join(pos_dir, filename)
                else:
                    outfile = os.path.join(neg_dir, filename)
                line_item = '%s %i %i %i' % (outfile, image_label, video_id,
                                             time_stamp)
                if os.path.exists(outfile):
                    fo.write(line_item + '\n')
                    continue
                print "downloading data from s3"
                # using the affine bucket for negative images
                img_path = 'thumbnail/%d/%d' % (video_id, time_stamp)
                s3client.download_from_s3(bucket, img_path, outfile)
                fo.write(line_item + '\n')
    fp.close()
    fo.close()
def get_false_positives(det, ratio, config2, inter_dir):
    """ Get negative images obtained during QA using mturk (false positives)"""
    clf_target = det.clf_target
    im = session.query(MTurkImage).filter_by(
        label_id=clf_target.target_label_id, result=False)
    im = im.join(
        ImageDetectorResult,
        MTurkImage.video_id == ImageDetectorResult.video_id
        and MTurkImage.timestamp == ImageDetectorResult.time)
    im = im.filter(ImageDetectorResult.clf_target_id == det.clf_target.id).\
        join(ImageHit, MTurkImage.image_hit_id == ImageHit.id).\
        filter(ImageHit.mturk_image_job_id==None)

    images = [(t.video_id, t.timestamp) for t in im]
    neg_image_set = []
    imgcount = len(images)
    if imgcount < MAX_NEG:
        config2['train_detector_params']['neg_train_min_num_frames'] = 0
        other, _ = get_negative_set_s3(config2, inter_dir, MAX_NEG - imgcount,
                                       1)
        extra = []
        for s3_line in other:
            s3, lb, v, d = s3_line.split()
            extra.append((int(v), int(d)))
        images = images + extra

    if not images:
        raise Exception('No negative examples found in mturk for detector %s' %
                        det.name)
    for vid, tm in images:
        s3_path = Video.construct_s3_image_url(vid, tm)
        neg_image_set.append([s3_path, -1, vid, tm])
    neg_train, neg_test = split_set(neg_image_set, ratio)
    return neg_train, neg_test
Ejemplo n.º 4
0
def save_boxes(l_id, video_id, boxes):
    """ Save boxes to TextBox table and detector result to BoxDetectorResult
    Args:
        l_id: target Label id
        video_id: Video id
        boxes: dict with key: timestamp, value: list of rectangles

    Returns:
        List of newly created box ids

    Raises/Assertions:
        Asserts if video id does not exist
        Asserts of label id does not exist
    """
    vid = Video.get(video_id)
    l = Label.get(l_id)
    assert vid, "Video %s does not exist" % video_id
    assert l, "Label %s does not exist" % l_id
    clf_target = ClassifierTarget.query.filter_by(target_label_id=l.id)\
        .join(TextDetectClassifier).one()
    box_ids = []
    for timestamp in boxes.keys():
        for h, w, y, x in boxes[timestamp]:
            box_id = Box.get_or_create(x=x,
                                       y=y,
                                       width=w,
                                       height=h,
                                       video=vid,
                                       timestamp=timestamp,
                                       box_type='Text')
            box_ids.append(box_id)
            BoxDetectorResult.log_result(box_id, clf_target.id)
    return box_ids
def sample_frames(videos, n_images, secs):
    """ Frame sampling from a video
        Args:
            videos: a list of video ids
            n_images: number of images (frames) to sample per video. If the number is bigger than the existing frames, we return all frames
            secs: sampling rate (int)
        Returns:
            set_videos: a list of tuples (video_id, timestamp)
        Assertions:
            AssertionError when n_images and secs <= 0, and when secs is not an integer
    """
    assert n_images > 0, "Number of images to sample should be > 0"
    assert secs > 0, "Sampling rate should be an integer > 0"
    assert isinstance(secs, int), "Sampling rate should be an integer"
    set_videos = []
    for video_id in videos:
        v = Video.get(video_id)
        timestamps = v.s3_timestamps()
        if not timestamps:
            continue
        slice_factor = len(timestamps) / ((v.length / secs) or 1) or 1
        frames = timestamps[::slice_factor]
        if len(frames) > n_images:
            np.random.shuffle(frames)
            set_videos += zip([video_id] * n_images, frames[0:n_images])
        else:
            set_videos += zip([video_id] * len(frames), frames)
    return set_videos
Ejemplo n.º 6
0
def judge_video(video_id, video_path, model_dir, conf_th, accept_th):
    """
    Evaluate if the video contains the targeted content,
    e.g., a videogame play, or not.

    This method LOADS the classifier whose model files are given in model_dir
    everytime is called

    Args:
        video_id: long, video id
        video_path: string, full path name of the video file
        model_dir: string, path to folder with classifier model files
        conf_th, accept_th: acceptance thresholds from the config of
        VideoMotionColorClassifier
    Returns:
        A Boolean saying wether the detector fires (True) or not (False) for
        this video

    Raises/Assertions:
        Asserts if the video file passed is not there
    """
    assert (os.path.exists(video_path)), \
        "Video %s does not exist" % (video_path)

    VIDEO_TARGET_LABEL = 0
    video_result = False
    predictions = []

    frames_folder = tempfile.mkdtemp()

    v = Video.get(video_id)

    if v:
        test_videoObj = VideoInfoObject(video_id=v.id,
                                        videopath=video_path,
                                        framespath=frames_folder,
                                        length=v.length)

        config_file = os.path.join(model_dir, 'Configfile.cfg')
        v_classif = VideoMotionColorClassifier(config_file)
        v_classif.load_model(model_dir)

        if conf_th <= 1:
            v_classif.confidence_th = conf_th
        if accept_th <= 1:
            v_classif.accept_th = accept_th

        predictions = v_classif.classify_videos([test_videoObj])

        video_result = (predictions[0] == VIDEO_TARGET_LABEL)

    return {}, video_result
def get_set_lines(bucket, min_neg_lines_to_use, train_pos, inter_dir,
                  s3_filename):
    if train_pos < min_neg_lines_to_use:
        n_neg_lines_to_use = min_neg_lines_to_use
    else:
        n_neg_lines_to_use = train_pos
    local_filename = os.path.join(inter_dir, s3_filename)
    s3client.download_from_s3(bucket, s3_filename, local_filename)
    with open(local_filename, 'r') as fo:
        all_neg = fo.read().splitlines()
    neg = []
    t_set = random.sample(xrange(1, len(all_neg)), n_neg_lines_to_use)
    for i in t_set:
        s3, lb, v, tm = all_neg[i].split()
        if Video.get(v):
            neg.append(all_neg[i])
    return neg
def get_positive_training(det, ratio):
    """ Get positive images obtained during training using mturk"""
    images = session.query(MTurkImage).join(ImageHit).join(MTurkImageJob)
    images = images.filter(
        MTurkImage.image_hit_id == ImageHit.id, MTurkImage.result == True,
        ImageHit.mturk_image_job_id == MTurkImageJob.id,
        MTurkImageJob.label_id == det.clf_target.target_label_id)
    images = images.limit(MAX_POS)
    image_count = images.count()
    if not image_count:
        raise Exception('No positive examples found in mturk for detector %s' %
                        det.name)
    pos_image_set = []
    for mi in images:
        s3_path = Video.construct_s3_image_url(mi.video_id, mi.timestamp)
        pos_image_set.append([s3_path, 1, mi.video_id, mi.timestamp])
    pos_train, pos_test = split_set(pos_image_set, ratio)
    return pos_train, pos_test
Ejemplo n.º 9
0
def judge_video(video_id, video_path, model_dir):
    """
    Evaluate if the video contains the targeted content,
    e.g., static photo, slideshow

    This method LOADS the classifier whose model files are given in model_dir
    everytime is called

    Args:
        video_id: long, video id
        video_path: string, full path name of the video file
        model_dir: string, path to folder with classifier model files
    Returns:
        A Boolean saying wether the detector fires (True) or not (False) for
        this video

    Raises/Assertions:
        Asserts if the video file passed is not there
    """
    assert (os.path.exists(video_path)), \
        "Video %s does not exist" % (video_path)

    ra = ResultAggregator()

    frames_folder = tempfile.mkdtemp()
    v = Video.get(video_id)
    if v:
        test_video_obj = VideoInfoObject(video_id=v.id, videopath=video_path,
                                         framespath=frames_folder,
                                         length=v.length)
        v_classif = Svcff.load_from_dir(model_dir)
        flow = v_classif.create_test_flow()

        video_result = flow.run_flow(video_obj=test_video_obj)
        if video_result in [Svcff.PHOTO, Svcff.SLIDESHOW]:
            ra.add_video_result(v_classif.target_labels[video_result])

    return ra.result_dict
Ejemplo n.º 10
0
 def create_set_from_file(self, image_file, label, limit=None):
     assert isinstance(label, str), "Label should be a string"
     assert label in self.labels.keys(), "Label is not part of the dataset"
     with open(image_file, 'r') as fo:
         lines = fo.read().splitlines()
     self.dataset[label].folder = os.path.join(self.data_folder, label)
     if not os.path.exists(self.dataset[label].folder):
         os.makedirs(self.dataset[label].folder)
     data = []
     for line in lines:
         if limit is not None and len(data) >= limit:
             break
         v, t = line.split('\t')
         assert v, "info file formating problem "
         assert len(v) == 12, "info file formating problem "
         assert isinstance(int(v), int), "info file formating problem "
         assert t, "info file formating problem "
         assert len(t) == 12, "info file formating problem "
         assert isinstance(int(t), int), "info file formating problem "
         v = int(v)
         t = int(t)
         localpath = os.path.join(self.dataset[label].folder,
                                  "%012d_%012d.jpg" % (v, t))
         vid = Video.get(v)
         vid.download_image(t, localpath)
         data.append([v, t])
         if label == 'pos':
             if limit is not None and len(data) >= limit:
                 break
             localpath_flipped = os.path.join(
                 self.dataset[label].folder,
                 "%012d_%012d.jpg" % (-1 * v, t))
             scene_functions.flip_image(localpath, localpath_flipped)
             data.append([-1 * v, t])
     self.dataset[label].num_images = len(data)
     data = np.array(data)
     self.dataset[label].indices = data
Ejemplo n.º 11
0
def download_video(v_name, v_id):
    """
    Downloads the video with v_id and stores it with the full path name provided
    in v_name if it's not already there. 

    Args:
        v_name: string, full path file name where to save the downloaded video
        v_id: long, video id of the video to be downloaded

    Returns:
        int, length of the downloaded video, in seconds.
        If the video file exists but the video_id is not on the DB, it returns -1
        If the file does not exist, and video_id is wrong or not on s3, returns 0

    Raises/Assertions:

    """
    video_length = -1

    v = Video.get(v_id)
    if os.path.exists(v_name):
        if v:
            video_length = v.length
    else:
        if v and v.s3_video:
            video_length = v.length
            logger.info("Downloading %d.flv ..." % (v.id))
            v.download_video(v_name)
        elif v:
            logger.error('Video is not on s3!')
            return 0
        else:
            logger.error('Video file does not exist!')
            return 0

    return video_length
Ejemplo n.º 12
0
def recognize_judge_video(clf_dir, video_id, imagedir):
    model_name = FaceRecognizeClassifierInjector.get_model_name(clf_dir)
    dp_client = DataProcessorClient(model_name)

    results = {}
    ra = ResultAggregator()
    votes = defaultdict(int)
    video = Video.get(video_id)
    assert video
    for box in video.face_boxes:
        path = time_to_image_path(imagedir, box.timestamp)
        fd, cropped_path = mkstemp(suffix='.jpg')
        os.close(fd)
        try:
            rect = get_rect_to_recognize(box)
            crop_image(path, cropped_path, *rect)
            [bin_data] = convert_files_to_bin([cropped_path])
            result = dp_client.predict(bin_data,
                                       box.width,
                                       box.height,
                                       async=True)
            results[box.id] = result
        finally:
            os.remove(cropped_path)
    for box_id, result in results.iteritems():
        label_id, conf, parts = result.wait(timeout=FACE_CELERY_TIMEOUT)
        if conf is not None:
            ra.add_face_info(box_id, conf, parts)
            if conf > FACE_MIN_CONFIDENCE and label_id is not None:
                assert Label.get(label_id)
                ra.add_box_result(box_id, label_id)
                votes[label_id] += 1
    for label_id, occur in votes.iteritems():
        if occur >= MIN_OCCURENCE_FOR_VIDEO:
            ra.add_video_result(label_id)
    return ra.result_dict