def __init__(self, path_video_file, frame_width, frame_height, use_saved=True): """Initialize the video segmentation module with a video Arguments: path_video_file {str} -- path of video to segment frame_width {int} -- frame width of the video frame_height {int} -- frame height of the video Keyword Arguments: use_saved {bool} -- whether to use saved feature matrix file or not (default: {True}) """ self.video_reader = VideoIO(path_video_file, frame_width, frame_height) feature_matrix_path = VideoSegment.get_feature_matrix_path( path_video_file) if use_saved and os.path.exists(feature_matrix_path): self.feature_matrix_A = np.load(feature_matrix_path) else: self.feature_matrix_A = VideoSegment.get_feature_matrix( self.video_reader) np.save(feature_matrix_path, self.feature_matrix_A) self.u, self.s, self.vh = np.linalg.svd(self.feature_matrix_A, full_matrices=False) # print('shape(U) = %s, shape(s) = %s, shape(V.T) = %s' % ( # self.u.shape, self.s.shape, self.vh.shape)) self.shot_boundaries = self._segment() logger.d('shot_boundaries', self.shot_boundaries) self.content_shots, self.ads_shots = self._tag_content_ads()
def start(self): logger.d("start") if self.state == 0: self.state = 1 try: _thread.start_new_thread(self.videoplayer, ()) _thread.start_new_thread(self.audioplayer, ()) except: logger.e("Error: unable to start thread")
def stop(self): logger.d("stop") self.state = 0 self.old = -1 self.index = 0 self.logo_index = 0 self.pilImage = self.video_io.read_frame(self.index) self.tkImage = ImageTk.PhotoImage(image=self.pilImage) self.label2.configure(imag=self.tkImage) self.root.update_idletasks() self.read_wav()
def seek(self, frame_index): """Move the current pointer in the target file to a given frame index position Arguments: frame_index {int} -- the frame index position to set the current pointer """ if self.file.closed: self.file = open(self.file_path, 'rb' if self.mode == 'r' else 'wb') offset = self.width * self.height * 3 * frame_index self.file.seek(offset) logger.d('file current position', self.file.tell())
def remove_ads(dataset_idx): dataset = DATASETS[dataset_idx] video_input = dataset['video'] audio_input = dataset['audio'] width = dataset['width'] height = dataset['height'] outputs = OUPUTS[dataset_idx]['no_ads'] video_output = outputs['video'] audio_output = outputs['audio'] video_segment = VideoSegment(video_input, width, height) content, ads = video_segment.get_content_ads_shots() logger.d('content', content) logger.d('ads', ads) video_segment.save_content(video_output, audio_input, audio_output)
def get_feature_matrix(video_io): feature_matrix_A = [] num_frames = video_io.get_num_frames() logger.d('Total # of frames', num_frames) for i in range(num_frames): logger.i('Getting feature matrix for frame #%d...' % i) frame = video_io.read_frame() if frame is None: break else: feature = VideoSegment.create_binned_histograms(frame) feature_matrix_A.append(feature) feature_matrix_A = np.transpose(feature_matrix_A) logger.d('shape(A)', feature_matrix_A.shape) return feature_matrix_A
def _logo_data_with_ads(self): """Generate new logo data (frame indices and polygon areas of logos) in in video with ads inserted, using the old logo data in the no-ads video Returns: list(tuple) -- new logo data """ ads_to_insert = self.logo_first_occurences pos_ads_length = [] for ad_name in ads_to_insert: ad = self.brands_to_detect[ad_name]['ad'] ad_n_frames = VideoIO( ad['video'], self.video_reader.width, self.video_reader.height).get_num_frames() pos_ads_length.append((ads_to_insert[ad_name], ad_n_frames)) pos_ads_length = np.array(sorted(pos_ads_length, key=lambda t: t[0])) logger.d('pos_ads_length', pos_ads_length) logo_data_with_ads = self.logo_data_in_video.copy() logger.d('logo_data_in_video', np.array(self.logo_data_in_video, dtype='object')) for t in logo_data_with_ads: prev_positions = np.where(pos_ads_length[:, 0] < t[0])[0] t[0] += sum(pos_ads_length[prev_positions][:, 1]) logger.d('logo_data_with_ads', np.array(logo_data_with_ads, dtype='object')) return logo_data_with_ads
def _tag_content_ads(self, threshold=SHOT_SIM_MIN): """Tag content/ad to the shots Keyword Arguments: threshold {float} -- a parameter for tagging (default: {SHOT_SIM_MIN}) Returns: tupe(list, list) -- indices of content shots and ad shots """ # we assume the longest shot in a video is not ad _longest_shot_idx = self._get_longest_shot_idx() logger.d('_longest_shot_idx', _longest_shot_idx) _similarity = np.array( self._calc_shots_differences()[_longest_shot_idx]) logger.d('_similarity', _similarity) _one_class = np.where(_similarity < threshold)[0] _other_class = np.where(_similarity >= threshold)[0] if self._get_shot_set_duration( _one_class) > self._get_shot_set_duration(_other_class): return _one_class, _other_class else: return _other_class, _one_class
def _detect(self): """Detect logos in all frames """ while self.video_reader.get_next_frame_idx() < self.video_reader.get_num_frames(): frame_idx = self.video_reader.get_next_frame_idx() logger.d('Detecting logo in %d' % frame_idx) pil_image = self.video_reader.read_frame() brand_areas = self.logo_detector.detect(pil_image) if len(brand_areas) == 0: logger.i('Frame[%d]: no logo detected' % frame_idx) else: for logo_name, logo_poly in brand_areas: logger.i('Frame[%d]: logo [%s] at area %s' % ( frame_idx, logo_name, logo_poly.tolist())) self.logo_data_in_video.append( [frame_idx, logo_name, logo_poly.tolist()]) if logo_name not in self.logo_first_occurences: self.logo_first_occurences[logo_name] = frame_idx self.video_reader.skip_frame(VideoLogoDetect.SKIP_FRAME) logo_data_path = path_util.get_video_logo_data_path(self.video_path) pickle.dump(self.logo_data_in_video, open(logo_data_path, 'wb')) logger.i('Logo detection data saved to %s' % logo_data_path)
def _sift_match(self, logo, frame_img): """Use SIFT to match a logo to a given frame image Arguments: logo {dict} -- dict of logo with name, image, key points, and descriptor frame_img {cv Image} -- an OpenCV image Returns: numpy.ndarray or None -- a polygone representing homography if there are valid matches else None """ logo_name, logo_img = logo['name'], logo['img'] kp_logo, des_logo = logo['keypoints'], logo['descriptor'] logger.d('logo_name', logo_name) kp_frame, des_frame = self.sift.detectAndCompute(frame_img, None) FLANN_INDEX_KDTREE = 0 index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5) search_params = dict(checks=50) flann = cv.FlannBasedMatcher(index_params, search_params) matches = flann.knnMatch(des_logo, des_frame, k=2) good = [] for m, n in matches: if m.distance < 0.75 * n.distance: good.append(m) if len(good) > LogoDetector.MIN_MATCH_COUNT: src_pts = np.float32([kp_logo[m.queryIdx].pt for m in good]).reshape(-1, 1, 2) dst_pts = np.float32([kp_frame[m.trainIdx].pt for m in good]).reshape(-1, 1, 2) M, mask = cv.findHomography(src_pts, dst_pts, cv.RANSAC, 1.0) matchesMask = mask.ravel().tolist() len_mask = np.count_nonzero(matchesMask) if len_mask > LogoDetector.MIN_RANSAC_MATCH_COUNT: logger.d('np.count_nonzero(matchesMask)', len_mask) h, w = logo_img.shape pts = np.float32([[0, 0], [0, h - 1], [w - 1, h - 1], [w - 1, 0]]).reshape(-1, 1, 2) dst = cv.perspectiveTransform(pts, M) logger.d('dst', dst) poly = np.int32(dst).reshape(4, 2) if util.valid_poly(poly): # frame_img = cv.polylines(frame_img, [np.int32(dst)], True, 255, 3, cv.LINE_AA) # draw_params = dict(matchColor=(0, 255, 0), # draw matches in green color # singlePointColor=None, # matchesMask=matchesMask, # draw only inliers # flags=2) # img3 = cv.drawMatches( # logo_img, kp_logo, frame_img, kp_frame, good, None, **draw_params) # plt.imshow(img3, 'gray'), plt.show() return poly else: logger.d("Polygon not valid") else: logger.d("Not enough matches after ransac: %d/%d" % (len_mask, LogoDetector.MIN_RANSAC_MATCH_COUNT)) else: logger.d("Not enough matches are found: %d/%d" % (len(good), LogoDetector.MIN_MATCH_COUNT)) matchesMask = None return None
def pause(self): logger.d("pause") self.state = 0
from data import BRANDS from logger import logger logger.set_level('i') # initializing dataset_idx = 0 dataset = DATASETS[dataset_idx] brands_to_detect = {k: BRANDS[k]['logo'] for k in dataset['brands_to_detect']} video_io = VideoIO(dataset['video'], dataset['width'], dataset['height']) # detect logo_detector = LogoDetector(brands_to_detect) logo_data_in_video = [] while video_io.get_next_frame_idx() < video_io.get_num_frames(): frame_idx = video_io.get_next_frame_idx() logger.d('Detecting logo in %d' % frame_idx) pil_image = video_io.read_frame() brand_areas = logo_detector.detect(pil_image) if len(brand_areas) == 0: logger.i('Frame[%d]: no logo detected' % frame_idx) else: for logo_name, logo_poly in brand_areas: logger.i('Frame[%d]: logo [%s] at area %s' % ( frame_idx, logo_name, logo_poly.tolist())) logo_data_in_video.append( (frame_idx, logo_name, logo_poly.tolist())) video_io.skip_frame(1) logo_data_path = path_util.get_video_logo_data_path(dataset['video']) pickle.dump(logo_data_in_video, open(logo_data_path, 'wb'))
index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5) search_params = dict(checks=50) flann = cv.FlannBasedMatcher(index_params, search_params) matches = flann.knnMatch(des1, des2, k=2) good = [] for m, n in matches: if m.distance < 0.75 * n.distance: good.append(m) if len(good) > MIN_MATCH_COUNT: src_pts = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2) dst_pts = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1, 1, 2) M, mask = cv.findHomography(src_pts, dst_pts, cv.RANSAC, 1.0) matchesMask = mask.ravel().tolist() len_mask = np.count_nonzero(matchesMask) if len_mask > MIN_RANSAC_MATCH_COUNT: logger.d('np.count_nonzero(matchesMask)', len_mask) h, w = img1.shape pts = np.float32([[0, 0], [0, h - 1], [w - 1, h - 1], [w - 1, 0]]).reshape(-1, 1, 2) dst = cv.perspectiveTransform(pts, M) logger.d('dst', np.int32(dst).reshape(4, 2).tolist()) logger.d('if_all_clockwise(dst)', util.if_all_clockwise(dst)) logger.d('if_not_slim(dst)', util.if_not_slim(dst)) img2 = cv.polylines(img2, [np.int32(dst)], True, 255, 3, cv.LINE_AA) draw_params = dict( matchColor=(0, 255, 0), # draw matches in green color singlePointColor=None, matchesMask=matchesMask, # draw only inliers flags=2) img3 = cv.drawMatches(img1, kp1, img2, kp2, good, None, **draw_params) plt.imshow(img3, 'gray'), plt.show()
result = False for shot in shots: result = result or in_shots(s, shot) return result def get_expected_indices(segment_shots, expected_contents): content_indices = [] ads_indices = [] for i, shot in enumerate(segment_shots): if in_shots(shot, expected_contents): content_indices.append(i) else: ads_indices.append(i) return np.array(content_indices), np.array(ads_indices) dataset_idx = 0 dataset = DATASETS[dataset_idx] video_segment = VideoSegment(dataset['video'], dataset['width'], dataset['height']) content, ads = video_segment.get_content_ads_shots() content_expected, ads_expected = get_expected_indices( video_segment.get_all_shots(), EXPECTED[dataset_idx]['content_shots']) logger.d('shots', np.array(video_segment.get_all_shots())) logger.d('content', video_segment.content_shots) logger.d('content_expected', content_expected) logger.d('ads', video_segment.ads_shots) logger.d('ads_expected', ads_expected)