def find_ads(self, video_ids): if not self._fitted: raise Exception("Train or load model before inference") if not isinstance(video_ids, list): video_ids = [video_ids] result = [] for video_id in video_ids: info = VideoInfo(video_id) subs = Subtitles(video_id, preprocess_russian_text_with_morph) scenes = self.sdm.detect_scenes(video_id) r = 0 ads = [] for l in range(len(scenes)): while r + 1 < len(scenes) and scenes[r][1] - scenes[l][ 0] < self.subs_part_len: r += 1 words = list( map(str.strip, subs.fulltext(scenes[l][0], scenes[r][1]).split())) x = self.vectorizer.infer_vector(words) x.append(scenes[l][0] / info.duration) y = self.subs_classifier.predict([x])[0] if y == 1: ads.append((scenes[l][0], scenes[r][1])) merged_ads = [] for ad in ads: if len(merged_ads) == 0 or ad[0] - merged_ads[-1][1] > 10: merged_ads.append(ad) else: merged_ads.append((merged_ads.pop()[0], ad[1])) result.append(merged_ads) return result if len(result) > 1 else result[0]
def find_ads(self, video_ids): if not self._fitted: raise Exception("Train or load model before inference") if not isinstance(video_ids, list): video_ids = [video_ids] result = [] for video_id in video_ids: info = VideoInfo(video_id) subs = Subtitles(video_id, preprocess_russian_text_with_morph) scenes = self.sdm.detect_scenes(video_id) r = 0 segments = [] xs = [] for l in range(len(scenes)): while r + 1 < len(scenes) and scenes[r][1] - scenes[l][ 0] < self.subs_part_len: r += 1 words = map(str.strip, subs.fulltext(scenes[l][0], scenes[r][1]).split()) words = filter(lambda word: word in self.buzz_words, words) words = map(lambda word: self.buzz_words[word], words) subs_part = ' '.join(words) x = list(self.vectorizer.transform([subs_part]).toarray()[0]) x.append(scenes[l][0] / info.duration) xs.append(x) seg = (scenes[l][0], scenes[r][1]) segments.append(seg) segments = [(0, 0)] * (self.window // 2) + segments + [ (0, 0) ] * (self.window // 2) features = len(xs[0]) xs = [[0] * features] * ( self.window // 2) + xs + [[0] * features] * (self.window // 2) X = [] for i in range(self.window // 2, len(xs) - self.window // 2): X.append(xs[i - self.window // 2:i + self.window // 2 + 1]) X = np.array(X) Y = self.subs_classifier.predict(X) > 0.5 ads = [] for i in range(self.window // 2, len(X) - self.window // 2): cnt = 0 for d in range(-(self.window // 2), self.window // 2 + 1): cnt += Y[i + d][self.window // 2 - d][0] if cnt > self.window // 2: ads.append(segments[i]) merged_ads = [] for ad in ads: if len(merged_ads) == 0 or ad[0] - merged_ads[-1][1] > 10: merged_ads.append(ad) else: merged_ads.append((merged_ads.pop()[0], ad[1])) result.append(merged_ads) return result if len(result) > 1 else result[0]
def train(self, markups): video_ids = list(markups.keys()) X = [] Y = [] print('start model training') for idx, video_id in enumerate(video_ids): print('\rprocessing video {} ({}/{})'.format( video_id, idx + 1, len(video_ids)), end='') subs = Subtitles(video_id, preprocess_russian_text_with_morph) scenes = self.sdm.detect_scenes(video_id) r = 0 xs = [] ys = [] for l in range(len(scenes)): while r + 1 < len(scenes) and scenes[r][1] - scenes[l][ 0] < self.subs_part_len: r += 1 words = map(str.strip, subs.fulltext(scenes[l][0], scenes[r][1]).split()) words = filter(lambda word: word in self.buzz_words, words) words = map(lambda word: self.buzz_words[word], words) subs_part = ' '.join(words) x = list(self.vectorizer.transform([subs_part]).toarray()[0]) info = VideoInfo(video_id) x.append(scenes[l][0] / info.duration) seg = (scenes[l][0], scenes[r][1]) xs.append(x) if self._intersect_ad(seg, markups[video_id]): ys.append(0.5) elif self._inside_ad(seg, markups[video_id]): ys.append(1) else: ys.append(0) features = len(xs[0]) xs = [[0] * features] * ( self.window // 2) + xs + [[0] * features] * (self.window // 2) ys = [0] * (self.window // 2) + ys + [0] * (self.window // 2) for i in range(self.window // 2, len(xs) - self.window // 2): X.append(xs[i - self.window // 2:i + self.window // 2 + 1]) Y.append(ys[i - self.window // 2:i + self.window // 2 + 1]) X = np.array(X) Y = np.array(Y) Y = Y.reshape((Y.shape[0], Y.shape[1], 1)) print(X.shape, Y.shape) model = Sequential() model.add( Bidirectional(LSTM(100, return_sequences=True), input_shape=(self.window, X.shape[2]))) model.add(TimeDistributed(Dense(1, activation='sigmoid'))) model.compile(optimizer='adam', loss='binary_crossentropy') np.random.seed(0) model.fit(X, Y, batch_size=len(X), shuffle=True, epochs=300) self.subs_classifier = model self._fitted = True
def find_ads(self, video_ids): if not isinstance(video_ids, list): video_ids = [video_ids] result = [] for video_id in video_ids: info = VideoInfo(video_id) subs = Subtitles(video_id, preprocess_russian_text_with_morph) scenes = self.sdm.detect_scenes(video_id) r = 0 xs = [] segments = [] for l in range(len(scenes)): while r + 1 < len(scenes) and scenes[r][1] - scenes[l][ 0] < self.subs_part_len: r += 1 words = list( map(str.strip, subs.fulltext(scenes[l][0], scenes[r][1]).split())) x = list(self.vectorizer.infer_vector(words)) x.append((scenes[l][0] + scenes[r][1]) / 2 / info.duration) xs.append(x) segments.append((scenes[l][0], scenes[r][1])) segments = [(0, 0)] * (self.window // 2) + segments + [ (0, 0) ] * (self.window // 2) features = len(xs[0]) xs = [[0] * features] * ( self.window // 2) + xs + [[0] * features] * (self.window // 2) X = [] for i in range(self.window // 2, len(xs) - self.window // 2): X.append(xs[i - self.window // 2:i + self.window // 2 + 1]) X = np.array(X) Y = self.subs_classifier.predict(X) > 0.5 ads = [] for i in range(self.window // 2, len(X) - self.window // 2): cnt = 0 for d in range(-(self.window // 2), self.window // 2 + 1): cnt += Y[i + d][self.window // 2 - d][0] if cnt > self.window // 2: ads.append(segments[i]) merged_ads = [] for ad in ads: if len(merged_ads) == 0 or ad[0] - merged_ads[-1][1] > 10: merged_ads.append(ad) else: merged_ads.append((merged_ads.pop()[0], ad[1])) result.append(merged_ads) return result if len(result) > 1 else result[0]
def train(self, markups): video_ids = list(markups.keys()) X = [] Y = [] print('start model training') tagged_data = [] subs_parts = [] for idx, video_id in enumerate(video_ids): print('\rprocessing video {} ({}/{})'.format( video_id, idx + 1, len(video_ids)), end='') subs = Subtitles(video_id, preprocess_russian_text_with_morph) scenes = self.sdm.detect_scenes(video_id) r = 0 for l in range(len(scenes)): while r + 1 < len(scenes) and scenes[r][1] - scenes[l][ 0] < self.subs_part_len: r += 1 seg = (scenes[l][0], scenes[r][1]) if self._intersect_ad(seg, markups[video_id]): continue words = list( map(str.strip, subs.fulltext(scenes[l][0], scenes[r][1]).split())) subs_parts.append(words) tagged_data.append( TaggedDocument(words=words, tags=[str(len(tagged_data))])) if self._inside_ad(seg, markups[video_id]): Y.append(1) else: Y.append(0) self.vectorizer = Doc2Vec(size=50, alpha=0.025, dm=0) self.vectorizer.build_vocab(tagged_data) self.vectorizer.train(tagged_data, total_examples=self.vectorizer.corpus_count, epochs=500) X = np.array( [self.vectorizer.infer_vector(words) for words in subs_parts]) Y = np.array(Y) print('X shape: ', X.shape) print('Y shape: ', Y.shape) self.subs_classifier = MLPClassifier(hidden_layer_sizes=(50, 10), solver='adam', random_state=0, learning_rate='adaptive', max_iter=1000) self.subs_classifier.fit(X, Y) self._fitted = True
def train(self, markups): video_ids = list(markups.keys()) X = [] Y = [] print('start model training') for idx, video_id in enumerate(video_ids): print('\rprocessing video {} ({}/{})'.format( video_id, idx + 1, len(video_ids)), end='') subs = Subtitles(video_id, preprocess_russian_text_with_morph) scenes = self.sdm.detect_scenes(video_id) r = 0 for l in range(len(scenes)): while r + 1 < len(scenes) and scenes[r][1] - scenes[l][ 0] < self.subs_part_len: r += 1 words = map(str.strip, subs.fulltext(scenes[l][0], scenes[r][1]).split()) words = filter(lambda word: word in self.buzz_words, words) words = map(lambda word: self.buzz_words[word], words) subs_part = ' '.join(words) x = list(self.vectorizer.transform([subs_part]).toarray()[0]) info = VideoInfo(video_id) x.append(scenes[l][0] / info.duration) seg = (scenes[l][0], scenes[r][1]) if self._intersect_ad(seg, markups[video_id]): continue X.append(x) if self._inside_ad(seg, markups[video_id]): Y.append(1) else: Y.append(0) X = np.array(X) Y = np.array(Y) self.subs_classifier = MLPClassifier(hidden_layer_sizes=(15, 5), solver='adam', random_state=0, learning_rate='adaptive', max_iter=1000) self.subs_classifier.fit(X, Y) self._fitted = True
def train(self, markups): video_ids = list(markups.keys()) print('start model training') tagged_data = [] subs_parts = [] Ys = [] ps = [] for idx, video_id in enumerate(video_ids): print('\rprocessing video {} ({}/{})'.format( video_id, idx + 1, len(video_ids)), end='') subs = Subtitles(video_id, preprocess_russian_text_with_morph) scenes = self.sdm.detect_scenes(video_id) r = 0 sp = [] ys = [] p = [] info = VideoInfo(video_id) for l in range(len(scenes)): while r + 1 < len(scenes) and scenes[r][1] - scenes[l][ 0] < self.subs_part_len: r += 1 seg = (scenes[l][0], scenes[r][1]) words = list( map(str.strip, subs.fulltext(scenes[l][0], scenes[r][1]).split())) sp.append(words) tagged_data.append( TaggedDocument(words=words, tags=[str(len(tagged_data))])) p.append((seg[0] + seg[1]) / 2 / info.duration) if self._inside_ad(seg, markups[video_id]): ys.append(1) elif self._intersect_ad(seg, markups[video_id]): ys.append(0.5) else: ys.append(0) subs_parts.append(sp) Ys.append(ys) ps.append(p) self.vectorizer = Doc2Vec(size=50, alpha=0.025, dm=0) self.vectorizer.build_vocab(tagged_data) self.vectorizer.train(tagged_data, total_examples=self.vectorizer.corpus_count, epochs=500) X = [] Y = [] for idx, video_id in enumerate(video_ids): print('\rprocessing video {} ({}/{})'.format( video_id, idx + 1, len(video_ids)), end='') xs = [ self.vectorizer.infer_vector(words) for words in subs_parts[idx] ] xs = list( np.hstack((xs, np.reshape(ps[idx], (len(ps[idx]), 1)))).tolist()) ys = Ys[idx] features = len(xs[0]) xs = [[0] * features] * ( self.window // 2) + xs + [[0] * features] * (self.window // 2) ys = [0] * (self.window // 2) + ys + [0] * (self.window // 2) for i in range(self.window // 2, len(xs) - self.window // 2): X.append(xs[i - self.window // 2:i + self.window // 2 + 1]) Y.append(ys[i - self.window // 2:i + self.window // 2 + 1]) X = np.array(X) Y = np.array(Y) Y = Y.reshape((Y.shape[0], Y.shape[1], 1)) print(X.shape, Y.shape) model = Sequential() model.add( Bidirectional(LSTM(100, return_sequences=True), input_shape=(self.window, X.shape[2]))) model.add(TimeDistributed(Dense(1, activation='sigmoid'))) model.compile(optimizer='adam', loss='binary_crossentropy') np.random.seed(0) model.fit(X, Y, batch_size=len(X), shuffle=True, epochs=500) self.subs_classifier = model self._fitted = True