def _merge_m3u8_by_tar_time(self, station_num: str, vod: typing.Dict, tar_time_range: typing.List): path = Path(self.VOD_PATH, str(station_num)) os.makedirs(path, exist_ok=True) self.log.info(f'[{self.bj_id}:{station_num}] get vod m3u8 info') tar_video = self._parse_m3u8(vod) self.log.info( f'[{self.bj_id}:{station_num}] get vod m3u8 info success') pool = Pool(20) for t in tar_time_range: min_range, max_range = t min_d, max_d = Duration.set_time(min_range).to_duration( ), Duration.set_time(max_range).to_duration() for i in range(min_d, max_d + 1): if i in tar_video: ts_path = path.joinpath(f'{i}.ts') if os.path.isfile( ts_path) and ts_path.stat().st_size > 1024 * 500: continue pool.add( gevent.spawn(self.down, url=tar_video[i], path=ts_path)) pool.join() self.log.info(f'[{self.bj_id}:{station_num}] download ts success') self._ts2mp4(path, output_name=station_num)
def get_duration_range(raw_duration: typing.List): range_long = TAR_SEC // config.THUMBNAIL_SIZE.DURATION_SEC if len(raw_duration) <= range_long: return None tar_duration = sorted(raw_duration) result, tmp = [], [] for i in range(1, len(raw_duration)): if tar_duration[i] - tar_duration[i - 1] == config.THUMBNAIL_SIZE.DURATION_SEC: tmp.append(tar_duration[i - 1]) elif tmp: if tar_duration[i - 1] - tmp[-1] == config.THUMBNAIL_SIZE.DURATION_SEC: tmp.append(tar_duration[i - 1]) if len(tmp) > SMALL_RANGE_SEC // config.THUMBNAIL_SIZE.DURATION_SEC: start_time = Duration.set_duration(tmp[0]).to_str() end_time = Duration.set_duration(tmp[-1]).to_str() result.append((start_time, end_time)) tmp = [] # 到最后都是连续的 if tmp: if tar_duration[i - 1] - tmp[-1] == config.THUMBNAIL_SIZE.DURATION_SEC: tmp.append(tar_duration[i - 1]) if len(tmp) > SMALL_RANGE_SEC // config.THUMBNAIL_SIZE.DURATION_SEC: start_time = Duration.set_duration(tmp[0]).to_str() end_time = Duration.set_duration(tmp[-1]).to_str() result.append((start_time, end_time)) return result
def row_sma(row): cond = (row['start_index'] <= cnt['index']) & (cnt['index'] <= row['index']) max_id = cnt.where(cond).dropna()['value'].idxmax() result = cnt.loc[max_id] row['ori_index'] = result['index'] row['ori_value'] = result['value'] ori_duration = result['index'] * per_index start_duration = Duration.set_duration(ori_duration - MIN_SEC * self.range_factor).to_str() end_duration = Duration.set_duration(ori_duration + MIN_SEC * self.range_factor).to_str() row['ori_range_duration'] = (start_duration, end_duration) return row
def local_run(self, dir_name: str, tar_sec: int = 60) -> typing.List: """ :param dir_name: :param tar_sec: 目标时长 (视频总时长,连续时间段时长) :return: """ y_predict = self.model() saver = tf.compat.v1.train.Saver() with tf.compat.v1.Session() as sess: saver.restore(sess, self.model_name) predict_y = tf.argmax(y_predict, -1) # result raw_result = [] for img_path, img in self._gen_valid_img(dir_name): predict = sess.run(predict_y, feed_dict={ self.X: [img], self.keep_prob: 1.0 }) predict_value = predict[0] _, vod_time, _ = img_path.stem.split('_') if predict_value == 1: raw_result.append( Duration.set_time(vod_time).to_duration()) self.log.info( f'[{self.user_id}/{dir_name} local run raw_result] {raw_result}') # res try: result = self._get_duration_range(tar_sec, raw_result) self.log.info(f'[{self.user_id}/{dir_name} local run] {result}') return result except Exception as e: self.log.error(f'[{self.user_id}/{dir_name} local run] {str(e)}')
def test_img(self, img_name: str): station_num, h, m, s = re.search(r'(.*?)_(.*?):(.*?):(.*)\.jpg', img_name).groups() param = self._get_thumbnail_param( int(station_num), Duration.delta(int(h), int(m), int(s)).to_duration()) print(self.THUMBNAIL_URL + util.join_params(**param))
def _trans_set2result(self, vod_set: dict) -> typing.Dict: result = {} for station_num, vod_range in vod_set.items(): result[station_num] = [] for start, s_range in vod_range.items(): s = start.split('_')[-1] for min_r, max_r in s_range: min_r_str = (Duration.set_time(s) + duration_delta( s=min_r * config.THUMBNAIL_SIZE.DURATION_SEC)).to_str() max_r_str = (Duration.set_time(s) + duration_delta( s=max_r * config.THUMBNAIL_SIZE.DURATION_SEC)).to_str() result[station_num].append((min_r_str, max_r_str)) return result
def merge_m3u8(tar_video: dict): path = 'video/47859255' os.makedirs(path, exist_ok=True) for t in tar: min_range, max_range = t min_d = Duration.set_time(min_range).to_duration() max_d = Duration.set_time(max_range).to_duration() for i in range(min_d, max_d): if i in tar_video: content = util.post_content(tar_video[i]) if content is not None: with open(os.path.join(path, f'{i}.ts'), 'wb') as f: f.write(content) merge_ts2mp4(path)
def sub_path(path: str, num, label: int = 0) -> str: p = Path(path) station_str, raw_duration = p.stem.split('_') duration = (Duration.set_time(raw_duration) + duration_delta(s=num)).to_str() file_name = f'{station_str}_{duration}_{label}{p.suffix}' file_dir = 'img' sub_img_path = Path(file_dir, file_name) return str(sub_img_path)
def test_duration(): assert Duration.set_time('2:5:0').to_str() == '2:5:0' assert Duration.set_time('2:5:0').to_duration() == 7500 assert Duration.set_duration(7500).to_duration() == 7500 assert Duration.set_duration(7500).to_str() == '2:5:0' assert (Duration.set_duration(7500) - duration_delta(m=5)).to_str() == '2:0:0' assert (Duration.set_duration(7500) - duration_delta(s=5)).to_str() == '2:4:55'
def _find_top(self, raw_data: typing.List, time_duration: int) -> typing.Optional[typing.List]: if not raw_data or not time_duration: return cnt = pd.DataFrame(raw_data, columns=['index', 'value']) perfect_duration = ( Duration.set_duration(time_duration) - duration_delta(m=self.perfect_start_min)).to_duration() per_index = time_duration // len(cnt) diff_duration = time_duration - perfect_duration perfect_start = diff_duration // per_index y = pd.DataFrame(raw_data[perfect_start:], columns=['index', 'value']) sma_period = perfect_start * self.smooth_factor Y = ta.SMA(y['value'].values.astype('float64'), timeperiod=sma_period).tolist() top = [] for i, d in enumerate(Y): if d > 0 and i < len(Y) - 1: if (Y[i - 1] <= d and d >= Y[i + 1]) or (i == 0 and d >= Y[i + 1]): top.append((i + perfect_start, d)) def row_sma(row): cond = (row['start_index'] <= cnt['index']) & (cnt['index'] <= row['index']) max_id = cnt.where(cond).dropna()['value'].idxmax() result = cnt.loc[max_id] row['ori_index'] = result['index'] row['ori_value'] = result['value'] ori_duration = result['index'] * per_index start_duration = Duration.set_duration(ori_duration - MIN_SEC * self.range_factor).to_str() end_duration = Duration.set_duration(ori_duration + MIN_SEC * self.range_factor).to_str() row['ori_range_duration'] = (start_duration, end_duration) return row top_df = pd.DataFrame(top, columns=['index', 'value']) top_df['value'].where(top_df['value'] > top_df['value'].mean(), inplace=True) top_df = top_df.where(top_df['value'] > 0).dropna() top_df['start_index'] = top_df['index'] - sma_period + 1 top_df['index'] = top_df['index'] target = top_df.apply(row_sma, axis=1) return target['ori_range_duration'].drop_duplicates().to_list()
def _get_duration_range(self, tar_sec: int, raw_duration: typing.List) -> typing.List: range_long = tar_sec // config.THUMBNAIL_SIZE.DURATION_SEC if len(raw_duration) < range_long: raise Exception(f'tar vod should logger than {tar_sec}') tar_duration = sorted(raw_duration) result, tmp = [], [] for i in range(1, len(raw_duration)): if tar_duration[i] - tar_duration[ i - 1] == config.THUMBNAIL_SIZE.DURATION_SEC: # 同一时间段先塞 tmp.append(tar_duration[i - 1]) elif tmp: if tar_duration[ i - 1] - tmp[-1] == config.THUMBNAIL_SIZE.DURATION_SEC: # 出现断层塞时间段的最后一个 tmp.append(tar_duration[i - 1]) if len(tmp) > tar_sec // config.THUMBNAIL_SIZE.DURATION_SEC: # 时间段长度 start_time = Duration.set_duration(tmp[0]).to_str() end_time = Duration.set_duration(tmp[-1]).to_str() result.append((start_time, end_time)) tmp = [] # 到最后都是连续的没有断层 if tmp: if tar_duration[i - 1] - tmp[-1] == config.THUMBNAIL_SIZE.DURATION_SEC: tmp.append(tar_duration[i - 1]) if len(tmp) > tar_sec // config.THUMBNAIL_SIZE.DURATION_SEC: start_time = Duration.set_duration(tmp[0]).to_str() end_time = Duration.set_duration(tmp[-1]).to_str() result.append((start_time, end_time)) return result
def sub_path( self, path, file_dir: str, num, label: int = 0, ) -> str: p = Path(path) station_str, raw_duration = p.stem.split('_') duration = (Duration.set_time(raw_duration) + duration_delta(s=num)).to_str() file_name = f'{duration}_{label}{p.suffix}' # self.log.info(f'[{self.user_id}:{station_str}] {file_name}') file_dir = file_dir # sub_img_path = Path(file_dir, station_str, file_name) sub_img_path = Path(file_dir, f'{station_str}_{file_name}') if not sub_img_path.parent.exists(): sub_img_path.parent.mkdir(parents=True) return str(sub_img_path)
def _get_duration_key(self, vod_time: str): vod_time_duration = Duration.set_time(vod_time).to_duration() - 1 set_time_key = vod_time_duration // TOTAL_DURATION * TOTAL_DURATION return Duration.set_duration(set_time_key).to_str()
def pl(raw_data, time_duration, station_num): if raw_data and time_duration: cnt = pd.DataFrame(raw_data, columns=['index', 'value']) x = list(range(0, len(cnt))) perfect_duration = (Duration.set_duration(time_duration) - duration_delta(m=5)).to_duration() per_index = time_duration // len(cnt) diff_duration = time_duration - perfect_duration perfect_start = diff_duration // per_index y = pd.DataFrame(raw_data[perfect_start:], columns=['index', 'value']) sma_period = perfect_start * 2 Y = ta.SMA(y['value'].values.astype('float64'), timeperiod=sma_period).tolist() top = [] for i, d in enumerate(Y): if d > 0 and i < len(Y) - 1: if (Y[i - 1] <= d and d >= Y[i + 1]) or (i == 0 and d >= Y[i + 1]): top.append((i + perfect_start, d)) # elif Y[i - 1] >= d and d <= Y[i + 1]: # bottom.append((i, d)) def row_sma(row): cond = (row['start_index'] <= cnt['index']) & (cnt['index'] <= row['index']) max_id = cnt.where(cond).dropna()['value'].idxmax() result = cnt.loc[max_id] row['ori_index'] = result['index'] row['ori_value'] = result['value'] row['ori_duration'] = result['index'] * per_index row['ori_range_duration'] = (row['ori_duration'] - MIN_SEC * 3, row['ori_duration'] + MIN_SEC * 3) return row top_df = pd.DataFrame(top, columns=['index', 'value']) top_df['value'].where(top_df['value'] > top_df['value'].mean(), inplace=True) top_df = top_df.where(top_df['value'] > 0).dropna() top_df['start_index'] = top_df['index'] - sma_period + 1 top_df['index'] = top_df['index'] a = top_df.apply(row_sma, axis=1) plt.plot(x, cnt['value'].values, 'r', linewidth=1, label='ori') plt.plot(x[5:], Y, 'b', linewidth=1, label=f'sma-{sma_period}') plt.scatter(top_df['index'], top_df['value'], 100, marker='^', label='max') plt.scatter(a['ori_index'], a['ori_value'], 100, marker='v', label='ori_max') plt.title(station_num) plt.legend() plt.show()
def test_d(): d = Duration.set_time('5:9:57').to_duration() total_duration = config.THUMBNAIL_SIZE.DURATION_SEC * config.THUMBNAIL_SIZE.DURATION_SEC * config.THUMBNAIL_SIZE.COLUMN_COUNT print(total_duration) print(Duration.set_duration(d - (d % total_duration)).to_str())
def test_c(): d = Duration.set_time('0:59:33').to_duration() - Duration.set_time('0:55:00').to_duration() print(d/3)
def valid_run(self, dir_name: str, small_range_sec=15): """run model""" # 小图总时长 total_duration = config.THUMBNAIL_SIZE.DURATION_SEC * config.THUMBNAIL_SIZE.ROW_COUNT * config.THUMBNAIL_SIZE.COLUMN_COUNT # 目标时长 TAR_SEC = 3 * 60 # small range 时长 SMALL_RANGE_SEC = small_range_sec def gen_valid_img(): data_path = Path(config.DATA.DATA_PATH, self.user_id, 'valid_data') for img_path in data_path.glob(dir_name + '*'): img = self._prepare_img_by_path(str(img_path)) yield img_path, img def get_duration_key(vod_time: str): vod_time_duration = Duration.set_time(vod_time).to_duration() - 1 set_time_key = vod_time_duration // total_duration * total_duration return Duration.set_duration(set_time_key).to_str() def get_duration_range(raw_duration: typing.List): range_long = TAR_SEC // config.THUMBNAIL_SIZE.DURATION_SEC if len(raw_duration) <= range_long: return None tar_duration = sorted(raw_duration) result, tmp = [], [] for i in range(1, len(raw_duration)): if tar_duration[i] - tar_duration[ i - 1] == config.THUMBNAIL_SIZE.DURATION_SEC: tmp.append(tar_duration[i - 1]) elif tmp: if tar_duration[ i - 1] - tmp[-1] == config.THUMBNAIL_SIZE.DURATION_SEC: tmp.append(tar_duration[i - 1]) if len( tmp ) > SMALL_RANGE_SEC // config.THUMBNAIL_SIZE.DURATION_SEC: start_time = Duration.set_duration(tmp[0]).to_str() end_time = Duration.set_duration(tmp[-1]).to_str() result.append((start_time, end_time)) tmp = [] # 到最后都是连续的 if tmp: if tar_duration[ i - 1] - tmp[-1] == config.THUMBNAIL_SIZE.DURATION_SEC: tmp.append(tar_duration[i - 1]) if len( tmp ) > SMALL_RANGE_SEC // config.THUMBNAIL_SIZE.DURATION_SEC: start_time = Duration.set_duration(tmp[0]).to_str() end_time = Duration.set_duration(tmp[-1]).to_str() result.append((start_time, end_time)) return result def precition(TP, FP: int): return TP / (TP + FP) def recall(TP, FN: int): return TP / (TP + FN) def f_score(TP, FP, FN: int): return 2 * TP / (2 * TP + FP + FN) y_predict = self.model() saver = tf.train.Saver() with tf.compat.v1.Session() as sess: saver.restore(sess, self.model_name) predict_y = tf.argmax(y_predict, -1) # stat stat_dict = defaultdict(dict) TP, TN, FP, FN = 0, 0, 0, 0 # result raw_result = [] for img_path, img in gen_valid_img(): predict = sess.run(predict_y, feed_dict={ self.X: [img], self.keep_prob: 1.0 }) predict_value = predict[0] vod_name, vod_time, label = img_path.stem.split('_') vod_time_key = get_duration_key(vod_time) if predict_value == 1: raw_result.append( Duration.set_time(vod_time).to_duration()) # acc stat_dict[vod_time_key].setdefault('right', 0) stat_dict[vod_time_key].setdefault('sum', 0) stat_dict[vod_time_key].setdefault('error_list', []) stat_dict[vod_time_key]['sum'] += 1 if int(label) == predict_value: stat_dict[vod_time_key]['right'] += 1 else: stat_dict[vod_time_key]['error_list'].append( img_path.stem.replace(':', '/')) # confusion matrix if int(label) == predict_value == 1: TP += 1 elif int(label) == predict_value == 0: TN += 1 elif int(label) == 1 and predict_value == 0: FN += 1 elif int(label) == 0 and predict_value == 1: FP += 1 # res result = get_duration_range(raw_result) self.log.info(f'[{self.user_id}/{dir_name}] {result}') # stat rate = [] for k, v in stat_dict.items(): v['rate'] = v.pop('right') / v.pop('sum') rate.append(v['rate']) pprint(stat_dict) acc_msg = f'acc {statistics.mean(rate):.4%}' confusion_matrix_msg = f'precition {precition(TP, FP):.4} recall {recall(TP, FN):.4} f_score {f_score(TP, FP, FN):.4}' self.log.info( f'[{self.user_id}/{dir_name} valid] {acc_msg} {confusion_matrix_msg}' ) pprint(acc_msg) pprint(confusion_matrix_msg) pprint(result)
def get_duration_key(vod_time: str): vod_time_duration = Duration.set_time(vod_time).to_duration() - 1 set_time_key = vod_time_duration // total_duration * total_duration return Duration.set_duration(set_time_key).to_str()