def recognize_text_in_video(self, image_dir, box_ids): """ Returns words for each text box in the video. Only accepts a max number of images. If there are too many images we only evaluate up to max_imgs and log that max images was reached. Args: image_dir: string path to images box_ids: list of box ids """ images = sample_images(image_dir, self.max_imgs, 0) images = dict((image_path_to_time(im), im) for im in images) boxes = session.query( Box.timestamp, Box.height, Box.width, Box.y, Box.x, Box.id).filter( Box.id.in_(box_ids)).order_by(Box.id.asc()).all() all_box_ids = [] filtered_boxes = defaultdict(list) for b in boxes: timestamp, h, w, x, y, b_id = b if timestamp in images: filtered_boxes[timestamp].append([h, w, x, y]) all_box_ids.append(b_id) else: logger.info('Image %s not in dir %s' % (timestamp, image_dir)) words = self.test(images, filtered_boxes) box_dict = {} for idx, b_id in enumerate(sorted(all_box_ids)): box_dict[b_id] = words[idx] return box_dict
def judge_video(image_dir, model_dir): """ Evaluate if the video sampled in image_dir contains the targeted content. This method LOADS the classifier whose model files are given in model_dir everytime is called Args: image_dir: string, path to folder with video frames model_dir: string, path to folder with classifier model files Returns: image_results: dict with tuples (timestamp, boolean_result) video_result: boolean result Raises/Assertions: if the folder with images or folder with model files do not exist """ assert (os.path.exists(image_dir)), \ "Frames folder %s does not exist" % (image_dir) assert (os.path.exists(model_dir)), \ "Model files folder %s does not exist" % (model_dir) ra = ResultAggregator() config_file = os.path.join(model_dir, 'Configfile.cfg') cnn_ff = CnnClassifierFlowFactory(config_file) cnn_ff.load_model(model_dir) max_num_frames = cnn_ff.max_frames_per_video f_predict = cnn_ff.get_process_video_flow() target_labels = cnn_ff.target_labels list_test_images = sample_images(image_dir, max_num_frames) num_frames = len(list_test_images) if num_frames: f_predict.run_flow(image_paths=list_test_images) predicted_labels = f_predict.output timestamps = [image_path_to_time(x) for x in list_test_images] results = {k: 0 for k in target_labels.keys()} for ts, clf_output in zip(timestamps, predicted_labels): clf_output = int(clf_output) if clf_output in target_labels.keys(): tl = target_labels[clf_output] ra.add_image_result(ts, tl) results[clf_output] += 1 for clf_output in target_labels.keys(): tl = target_labels[clf_output] num_img_results = np.sum(results[clf_output]) frames_current_tl = num_img_results / float(num_frames) video_result = (frames_current_tl >= cnn_ff.accept_th)\ and (num_img_results >= cnn_ff.min_accept) if video_result: ra.add_video_result(tl) return ra.result_dict
def make_infofile(outfile, imagedir, default_images, min_images): """Write infofile for bovw that contains sampling of images from a directory""" images = sample_images(imagedir, default_images, min_images) if not images: raise NotEnoughImages for path in images: path = os.path.abspath(path) timestamp = image_path_to_time(path) line = '%s 1 1 %s\n' % (path, timestamp) outfile.write(line)
def detect_judge_video(imagedir): face_images = sample_images(imagedir, FACE_IMAGES_PER_VIDEO, FACE_MIN_IMAGES_PER_VIDEO) extractor = FaceExtractor() ra = ResultAggregator() for image in face_images or []: ts = image_path_to_time(image) face_boxes = extractor.extract_faces(image) for x, y, w, h in face_boxes: ra.add_new_box(x, y, w, h, ts, 'Face') return ra.result_dict
def judge_images(image_dir, model_dir, image_threshold): image_paths = sample_images(image_dir, DEFAULT_IMAGES_PER_VIDEO, MIN_IMAGES_PER_VIDEO) if image_paths is None: raise NotEnoughImages image_paths = [os.path.abspath(x) for x in image_paths] timestamps = [image_path_to_time(x) for x in image_paths] clf = SpatialSceneClassifier.load_from_dir(model_dir) labels = clf.test(image_paths, svm_threshold=image_threshold) results = [True if x == POS_LABEL else False for x in labels] return dict(zip(timestamps, results))
def judge_video(clf_model_dir, imagedir): image_paths = sample_images(imagedir, SAMPLE_IMAGE_COUNT) random.shuffle(image_paths) image_paths = image_paths[:IMAGES_PER_VIDEO] model_name = get_model_name(clf_model_dir) logo_client = LogoClient(model_name) image_results = logo_client.predict(image_paths) timestamps = map(image_path_to_time, image_paths) ra = ResultAggregator() for ts, labeled_boxes in zip(timestamps, image_results): for h, w, y, x, target_label_id in labeled_boxes: ra.add_new_box(x, y, w, h, ts, 'Logo', label_id=target_label_id) return ra.result_dict
def detect_text_in_video(self, image_dir): """Returns bounding rectangles for each frame in the video. Only accepts a max number of images. If there are too many images we only evaluate up to max_imgs and log that max images was reached. Args: frames: list of imgs from video, assuming they end in '.jpg' Returns: img_files: list of img files bounding_rectslist of bounding rectangles """ self.img_files = sample_images(image_dir, self.max_imgs, 0) bounding_rects = self.test() return self.img_files, bounding_rects