def recognize_text_in_video(self, image_dir, box_ids):
        Returns words for each text box in the video.  Only
        accepts a max number of images.  If there are too many images we only
        evaluate up to max_imgs and log that max images was reached.

            image_dir: string path to images
            box_ids: list of box ids
        images = sample_images(image_dir, self.max_imgs, 0)
        images = dict((image_path_to_time(im), im) for im in images)
        boxes = session.query(
            Box.timestamp, Box.height, Box.width, Box.y, Box.x,
        all_box_ids = []
        filtered_boxes = defaultdict(list)
        for b in boxes:
            timestamp, h, w, x, y, b_id = b
            if timestamp in images:
                filtered_boxes[timestamp].append([h, w, x, y])
      'Image %s not in dir %s' % (timestamp, image_dir))
        words = self.test(images, filtered_boxes)
        box_dict = {}
        for idx, b_id in enumerate(sorted(all_box_ids)):
            box_dict[b_id] = words[idx]
        return box_dict
def judge_video(image_dir, model_dir):
    Evaluate if the video sampled in image_dir contains the targeted content.

    This method LOADS the classifier whose model files are given in model_dir
    everytime is called

        image_dir: string, path to folder with video frames
        model_dir: string, path to folder with classifier model files
        image_results: dict with tuples (timestamp, boolean_result)
        video_result: boolean result
        if the folder with images or folder with model files do not exist
    assert (os.path.exists(image_dir)), \
        "Frames folder %s does not exist" % (image_dir)
    assert (os.path.exists(model_dir)), \
        "Model files folder %s does not exist" % (model_dir)

    ra = ResultAggregator()

    config_file = os.path.join(model_dir, 'Configfile.cfg')
    cnn_ff = CnnClassifierFlowFactory(config_file)

    max_num_frames = cnn_ff.max_frames_per_video
    f_predict = cnn_ff.get_process_video_flow()
    target_labels = cnn_ff.target_labels

    list_test_images = sample_images(image_dir, max_num_frames)
    num_frames = len(list_test_images)

    if num_frames:
        predicted_labels = f_predict.output
        timestamps = [image_path_to_time(x) for x in list_test_images]

        results = {k: 0 for k in target_labels.keys()}
        for ts, clf_output in zip(timestamps, predicted_labels):
            clf_output = int(clf_output)
            if clf_output in target_labels.keys():
                tl = target_labels[clf_output]
                ra.add_image_result(ts, tl)
                results[clf_output] += 1

        for clf_output in target_labels.keys():
            tl = target_labels[clf_output]
            num_img_results = np.sum(results[clf_output])
            frames_current_tl = num_img_results / float(num_frames)
            video_result = (frames_current_tl >= cnn_ff.accept_th)\
                and (num_img_results >= cnn_ff.min_accept)
            if video_result:

    return ra.result_dict
def make_infofile(outfile, imagedir, default_images, min_images):
    """Write infofile for bovw that contains sampling of images from a directory"""
    images = sample_images(imagedir, default_images, min_images)
    if not images:
        raise NotEnoughImages
    for path in images:
        path = os.path.abspath(path)
        timestamp = image_path_to_time(path)
        line = '%s 1 1 %s\n' % (path, timestamp)
Example #4
def detect_judge_video(imagedir):
    face_images = sample_images(imagedir, FACE_IMAGES_PER_VIDEO,
    extractor = FaceExtractor()
    ra = ResultAggregator()
    for image in face_images or []:
        ts = image_path_to_time(image)
        face_boxes = extractor.extract_faces(image)
        for x, y, w, h in face_boxes:
            ra.add_new_box(x, y, w, h, ts, 'Face')
    return ra.result_dict
Example #5
def judge_images(image_dir, model_dir, image_threshold):
    image_paths = sample_images(image_dir, DEFAULT_IMAGES_PER_VIDEO,
    if image_paths is None:
        raise NotEnoughImages
    image_paths = [os.path.abspath(x) for x in image_paths]
    timestamps = [image_path_to_time(x) for x in image_paths]
    clf = SpatialSceneClassifier.load_from_dir(model_dir)
    labels = clf.test(image_paths, svm_threshold=image_threshold)
    results = [True if x == POS_LABEL else False for x in labels]
    return dict(zip(timestamps, results))
Example #6
def judge_video(clf_model_dir, imagedir):
    image_paths = sample_images(imagedir, SAMPLE_IMAGE_COUNT)
    image_paths = image_paths[:IMAGES_PER_VIDEO]
    model_name = get_model_name(clf_model_dir)
    logo_client = LogoClient(model_name)
    image_results = logo_client.predict(image_paths)
    timestamps = map(image_path_to_time, image_paths)
    ra = ResultAggregator()
    for ts, labeled_boxes in zip(timestamps, image_results):
        for h, w, y, x, target_label_id in labeled_boxes:
            ra.add_new_box(x, y, w, h, ts, 'Logo', label_id=target_label_id)
    return ra.result_dict
Example #7
    def detect_text_in_video(self, image_dir):
        """Returns bounding rectangles for each frame in the video.  Only
        accepts a max number of images.  If there are too many images we only
        evaluate up to max_imgs and log that max images was reached.

            frames: list of imgs from video, assuming they end in '.jpg'

            img_files: list of img files
            bounding_rectslist of bounding rectangles
        self.img_files = sample_images(image_dir, self.max_imgs, 0)

        bounding_rects = self.test()
        return self.img_files, bounding_rects