Esempio n. 1
0
 def __init__(self,
              path_video_file,
              frame_width,
              frame_height,
              use_saved=True):
     """Initialize the video segmentation module with a video
     
     Arguments:
         path_video_file {str} -- path of video to segment
         frame_width {int} -- frame width of the video
         frame_height {int} -- frame height of the video
     
     Keyword Arguments:
         use_saved {bool} -- whether to use saved feature matrix file or not (default: {True})
     """
     self.video_reader = VideoIO(path_video_file, frame_width, frame_height)
     feature_matrix_path = VideoSegment.get_feature_matrix_path(
         path_video_file)
     if use_saved and os.path.exists(feature_matrix_path):
         self.feature_matrix_A = np.load(feature_matrix_path)
     else:
         self.feature_matrix_A = VideoSegment.get_feature_matrix(
             self.video_reader)
         np.save(feature_matrix_path, self.feature_matrix_A)
     self.u, self.s, self.vh = np.linalg.svd(self.feature_matrix_A,
                                             full_matrices=False)
     # print('shape(U) = %s, shape(s) = %s, shape(V.T) = %s' % (
     #     self.u.shape, self.s.shape, self.vh.shape))
     self.shot_boundaries = self._segment()
     logger.d('shot_boundaries', self.shot_boundaries)
     self.content_shots, self.ads_shots = self._tag_content_ads()
Esempio n. 2
0
 def start(self):
     logger.d("start")
     if self.state == 0:
         self.state = 1
         try:
             _thread.start_new_thread(self.videoplayer, ())
             _thread.start_new_thread(self.audioplayer, ())
         except:
             logger.e("Error: unable to start thread")
Esempio n. 3
0
 def stop(self):
     logger.d("stop")
     self.state = 0
     self.old = -1
     self.index = 0
     self.logo_index = 0
     self.pilImage = self.video_io.read_frame(self.index)
     self.tkImage = ImageTk.PhotoImage(image=self.pilImage)
     self.label2.configure(imag=self.tkImage)
     self.root.update_idletasks()
     self.read_wav()
Esempio n. 4
0
 def seek(self, frame_index):
     """Move the current pointer in the target file to a given frame index position
     
     Arguments:
         frame_index {int} -- the frame index position to set the current pointer
     """
     if self.file.closed:
         self.file = open(self.file_path,
                          'rb' if self.mode == 'r' else 'wb')
     offset = self.width * self.height * 3 * frame_index
     self.file.seek(offset)
     logger.d('file current position', self.file.tell())
Esempio n. 5
0
def remove_ads(dataset_idx):
    dataset = DATASETS[dataset_idx]
    video_input = dataset['video']
    audio_input = dataset['audio']
    width = dataset['width']
    height = dataset['height']
    outputs = OUPUTS[dataset_idx]['no_ads']
    video_output = outputs['video']
    audio_output = outputs['audio']
    video_segment = VideoSegment(video_input, width, height)
    content, ads = video_segment.get_content_ads_shots()
    logger.d('content', content)
    logger.d('ads', ads)
    video_segment.save_content(video_output, audio_input, audio_output)
Esempio n. 6
0
 def get_feature_matrix(video_io):
     feature_matrix_A = []
     num_frames = video_io.get_num_frames()
     logger.d('Total # of frames', num_frames)
     for i in range(num_frames):
         logger.i('Getting feature matrix for frame #%d...' % i)
         frame = video_io.read_frame()
         if frame is None:
             break
         else:
             feature = VideoSegment.create_binned_histograms(frame)
             feature_matrix_A.append(feature)
     feature_matrix_A = np.transpose(feature_matrix_A)
     logger.d('shape(A)', feature_matrix_A.shape)
     return feature_matrix_A
Esempio n. 7
0
 def _logo_data_with_ads(self):
     """Generate new logo data (frame indices and polygon areas of logos) in 
     in video with ads inserted, using the old logo data in the no-ads video
     
     Returns:
         list(tuple) -- new logo data
     """
     ads_to_insert = self.logo_first_occurences
     pos_ads_length = []
     for ad_name in ads_to_insert:
         ad = self.brands_to_detect[ad_name]['ad']
         ad_n_frames = VideoIO(
             ad['video'], 
             self.video_reader.width, 
             self.video_reader.height).get_num_frames()
         pos_ads_length.append((ads_to_insert[ad_name], ad_n_frames))
     pos_ads_length = np.array(sorted(pos_ads_length, key=lambda t: t[0]))
     logger.d('pos_ads_length', pos_ads_length)
     logo_data_with_ads = self.logo_data_in_video.copy()
     logger.d('logo_data_in_video', np.array(self.logo_data_in_video, dtype='object'))
     for t in logo_data_with_ads:
         prev_positions = np.where(pos_ads_length[:, 0] < t[0])[0]
         t[0] += sum(pos_ads_length[prev_positions][:, 1])
     logger.d('logo_data_with_ads', np.array(logo_data_with_ads, dtype='object'))
     return logo_data_with_ads
Esempio n. 8
0
 def _tag_content_ads(self, threshold=SHOT_SIM_MIN):
     """Tag content/ad to the shots
     
     Keyword Arguments:
         threshold {float} -- a parameter for tagging (default: {SHOT_SIM_MIN})
     
     Returns:
         tupe(list, list) -- indices of content shots and ad shots
     """
     # we assume the longest shot in a video is not ad
     _longest_shot_idx = self._get_longest_shot_idx()
     logger.d('_longest_shot_idx', _longest_shot_idx)
     _similarity = np.array(
         self._calc_shots_differences()[_longest_shot_idx])
     logger.d('_similarity', _similarity)
     _one_class = np.where(_similarity < threshold)[0]
     _other_class = np.where(_similarity >= threshold)[0]
     if self._get_shot_set_duration(
             _one_class) > self._get_shot_set_duration(_other_class):
         return _one_class, _other_class
     else:
         return _other_class, _one_class
Esempio n. 9
0
 def _detect(self):
     """Detect logos in all frames 
     """
     while self.video_reader.get_next_frame_idx() < self.video_reader.get_num_frames():
         frame_idx = self.video_reader.get_next_frame_idx()
         logger.d('Detecting logo in %d' % frame_idx)
         pil_image = self.video_reader.read_frame()
         brand_areas = self.logo_detector.detect(pil_image)
         if len(brand_areas) == 0:
             logger.i('Frame[%d]: no logo detected' % frame_idx)
         else:
             for logo_name, logo_poly in brand_areas:
                 logger.i('Frame[%d]: logo [%s] at area %s' % (
                     frame_idx, logo_name, logo_poly.tolist()))
                 self.logo_data_in_video.append(
                     [frame_idx, logo_name, logo_poly.tolist()])
                 if logo_name not in self.logo_first_occurences:
                     self.logo_first_occurences[logo_name] = frame_idx
         self.video_reader.skip_frame(VideoLogoDetect.SKIP_FRAME)
     logo_data_path = path_util.get_video_logo_data_path(self.video_path)
     pickle.dump(self.logo_data_in_video, open(logo_data_path, 'wb'))
     logger.i('Logo detection data saved to %s' % logo_data_path)
Esempio n. 10
0
 def _sift_match(self, logo, frame_img):
     """Use SIFT to match a logo to a given frame image
     
     Arguments:
         logo {dict} -- dict of logo with name, image, key points, and descriptor
         frame_img {cv Image} -- an OpenCV image
     
     Returns:
         numpy.ndarray or None -- a polygone representing homography if there are valid matches else None
     """
     logo_name, logo_img = logo['name'], logo['img']
     kp_logo, des_logo = logo['keypoints'], logo['descriptor']
     logger.d('logo_name', logo_name)
     kp_frame, des_frame = self.sift.detectAndCompute(frame_img, None)
     FLANN_INDEX_KDTREE = 0
     index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
     search_params = dict(checks=50)
     flann = cv.FlannBasedMatcher(index_params, search_params)
     matches = flann.knnMatch(des_logo, des_frame, k=2)
     good = []
     for m, n in matches:
         if m.distance < 0.75 * n.distance:
             good.append(m)
     if len(good) > LogoDetector.MIN_MATCH_COUNT:
         src_pts = np.float32([kp_logo[m.queryIdx].pt
                               for m in good]).reshape(-1, 1, 2)
         dst_pts = np.float32([kp_frame[m.trainIdx].pt
                               for m in good]).reshape(-1, 1, 2)
         M, mask = cv.findHomography(src_pts, dst_pts, cv.RANSAC, 1.0)
         matchesMask = mask.ravel().tolist()
         len_mask = np.count_nonzero(matchesMask)
         if len_mask > LogoDetector.MIN_RANSAC_MATCH_COUNT:
             logger.d('np.count_nonzero(matchesMask)', len_mask)
             h, w = logo_img.shape
             pts = np.float32([[0, 0], [0, h - 1], [w - 1, h - 1],
                               [w - 1, 0]]).reshape(-1, 1, 2)
             dst = cv.perspectiveTransform(pts, M)
             logger.d('dst', dst)
             poly = np.int32(dst).reshape(4, 2)
             if util.valid_poly(poly):
                 # frame_img = cv.polylines(frame_img, [np.int32(dst)], True, 255, 3, cv.LINE_AA)
                 # draw_params = dict(matchColor=(0, 255, 0),  # draw matches in green color
                 #                 singlePointColor=None,
                 #                 matchesMask=matchesMask,  # draw only inliers
                 #                 flags=2)
                 # img3 = cv.drawMatches(
                 #     logo_img, kp_logo, frame_img, kp_frame, good, None, **draw_params)
                 # plt.imshow(img3, 'gray'), plt.show()
                 return poly
             else:
                 logger.d("Polygon not valid")
         else:
             logger.d("Not enough matches after ransac: %d/%d" %
                      (len_mask, LogoDetector.MIN_RANSAC_MATCH_COUNT))
     else:
         logger.d("Not enough matches are found: %d/%d" %
                  (len(good), LogoDetector.MIN_MATCH_COUNT))
         matchesMask = None
     return None
Esempio n. 11
0
 def pause(self):
     logger.d("pause")
     self.state = 0
from data import BRANDS
from logger import logger

logger.set_level('i')

# initializing
dataset_idx = 0
dataset = DATASETS[dataset_idx]
brands_to_detect = {k: BRANDS[k]['logo'] for k in dataset['brands_to_detect']}
video_io = VideoIO(dataset['video'], dataset['width'], dataset['height'])

# detect
logo_detector = LogoDetector(brands_to_detect)
logo_data_in_video = []
while video_io.get_next_frame_idx() < video_io.get_num_frames():
    frame_idx = video_io.get_next_frame_idx()
    logger.d('Detecting logo in %d' % frame_idx)
    pil_image = video_io.read_frame()
    brand_areas = logo_detector.detect(pil_image)
    if len(brand_areas) == 0:
        logger.i('Frame[%d]: no logo detected' % frame_idx)
    else:
        for logo_name, logo_poly in brand_areas:
            logger.i('Frame[%d]: logo [%s] at area %s' % (
                frame_idx, logo_name, logo_poly.tolist()))
            logo_data_in_video.append(
                (frame_idx, logo_name, logo_poly.tolist()))
    video_io.skip_frame(1)
logo_data_path = path_util.get_video_logo_data_path(dataset['video'])
pickle.dump(logo_data_in_video, open(logo_data_path, 'wb'))
Esempio n. 13
0
index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
search_params = dict(checks=50)
flann = cv.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(des1, des2, k=2)
good = []
for m, n in matches:
    if m.distance < 0.75 * n.distance:
        good.append(m)
if len(good) > MIN_MATCH_COUNT:
    src_pts = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
    dst_pts = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
    M, mask = cv.findHomography(src_pts, dst_pts, cv.RANSAC, 1.0)
    matchesMask = mask.ravel().tolist()
    len_mask = np.count_nonzero(matchesMask)
    if len_mask > MIN_RANSAC_MATCH_COUNT:
        logger.d('np.count_nonzero(matchesMask)', len_mask)
        h, w = img1.shape
        pts = np.float32([[0, 0], [0, h - 1], [w - 1, h - 1],
                          [w - 1, 0]]).reshape(-1, 1, 2)
        dst = cv.perspectiveTransform(pts, M)
        logger.d('dst', np.int32(dst).reshape(4, 2).tolist())
        logger.d('if_all_clockwise(dst)', util.if_all_clockwise(dst))
        logger.d('if_not_slim(dst)', util.if_not_slim(dst))
        img2 = cv.polylines(img2, [np.int32(dst)], True, 255, 3, cv.LINE_AA)
        draw_params = dict(
            matchColor=(0, 255, 0),  # draw matches in green color
            singlePointColor=None,
            matchesMask=matchesMask,  # draw only inliers
            flags=2)
        img3 = cv.drawMatches(img1, kp1, img2, kp2, good, None, **draw_params)
        plt.imshow(img3, 'gray'), plt.show()
        result = False
        for shot in shots:
            result = result or in_shots(s, shot)
        return result


def get_expected_indices(segment_shots, expected_contents):
    content_indices = []
    ads_indices = []
    for i, shot in enumerate(segment_shots):
        if in_shots(shot, expected_contents):
            content_indices.append(i)
        else:
            ads_indices.append(i)
    return np.array(content_indices), np.array(ads_indices)


dataset_idx = 0
dataset = DATASETS[dataset_idx]
video_segment = VideoSegment(dataset['video'], dataset['width'],
                             dataset['height'])
content, ads = video_segment.get_content_ads_shots()

content_expected, ads_expected = get_expected_indices(
    video_segment.get_all_shots(), EXPECTED[dataset_idx]['content_shots'])
logger.d('shots', np.array(video_segment.get_all_shots()))
logger.d('content', video_segment.content_shots)
logger.d('content_expected', content_expected)
logger.d('ads', video_segment.ads_shots)
logger.d('ads_expected', ads_expected)