コード例 #1
0
ファイル: base_m3u8.py プロジェクト: Monardes/AfreecaDance
    def _merge_m3u8_by_tar_time(self, station_num: str, vod: typing.Dict,
                                tar_time_range: typing.List):
        path = Path(self.VOD_PATH, str(station_num))
        os.makedirs(path, exist_ok=True)
        self.log.info(f'[{self.bj_id}:{station_num}] get vod m3u8 info')
        tar_video = self._parse_m3u8(vod)
        self.log.info(
            f'[{self.bj_id}:{station_num}] get vod m3u8 info success')
        pool = Pool(20)
        for t in tar_time_range:
            min_range, max_range = t
            min_d, max_d = Duration.set_time(min_range).to_duration(
            ), Duration.set_time(max_range).to_duration()
            for i in range(min_d, max_d + 1):
                if i in tar_video:
                    ts_path = path.joinpath(f'{i}.ts')
                    if os.path.isfile(
                            ts_path) and ts_path.stat().st_size > 1024 * 500:
                        continue
                    pool.add(
                        gevent.spawn(self.down, url=tar_video[i],
                                     path=ts_path))

        pool.join()
        self.log.info(f'[{self.bj_id}:{station_num}] download ts success')
        self._ts2mp4(path, output_name=station_num)
コード例 #2
0
ファイル: test_timeutil.py プロジェクト: weakeng/AfreecaDance
    def get_duration_range(raw_duration: typing.List):

        range_long = TAR_SEC // config.THUMBNAIL_SIZE.DURATION_SEC
        if len(raw_duration) <= range_long:
            return None

        tar_duration = sorted(raw_duration)
        result, tmp = [], []

        for i in range(1, len(raw_duration)):
            if tar_duration[i] - tar_duration[i - 1] == config.THUMBNAIL_SIZE.DURATION_SEC:
                tmp.append(tar_duration[i - 1])
            elif tmp:
                if tar_duration[i - 1] - tmp[-1] == config.THUMBNAIL_SIZE.DURATION_SEC:
                    tmp.append(tar_duration[i - 1])
                if len(tmp) > SMALL_RANGE_SEC // config.THUMBNAIL_SIZE.DURATION_SEC:
                    start_time = Duration.set_duration(tmp[0]).to_str()
                    end_time = Duration.set_duration(tmp[-1]).to_str()
                    result.append((start_time, end_time))
                tmp = []

        # 到最后都是连续的
        if tmp:
            if tar_duration[i - 1] - tmp[-1] == config.THUMBNAIL_SIZE.DURATION_SEC:
                tmp.append(tar_duration[i - 1])
            if len(tmp) > SMALL_RANGE_SEC // config.THUMBNAIL_SIZE.DURATION_SEC:
                start_time = Duration.set_duration(tmp[0]).to_str()
                end_time = Duration.set_duration(tmp[-1]).to_str()
                result.append((start_time, end_time))

        return result
コード例 #3
0
ファイル: base_spider.py プロジェクト: Monardes/AfreecaDance
 def row_sma(row):
     cond = (row['start_index'] <= cnt['index']) & (cnt['index'] <=
                                                    row['index'])
     max_id = cnt.where(cond).dropna()['value'].idxmax()
     result = cnt.loc[max_id]
     row['ori_index'] = result['index']
     row['ori_value'] = result['value']
     ori_duration = result['index'] * per_index
     start_duration = Duration.set_duration(ori_duration - MIN_SEC *
                                            self.range_factor).to_str()
     end_duration = Duration.set_duration(ori_duration + MIN_SEC *
                                          self.range_factor).to_str()
     row['ori_range_duration'] = (start_duration, end_duration)
     return row
コード例 #4
0
ファイル: base_model.py プロジェクト: weakeng/AfreecaDance
    def local_run(self, dir_name: str, tar_sec: int = 60) -> typing.List:
        """
        :param dir_name:
        :param tar_sec: 目标时长 (视频总时长,连续时间段时长)
        :return:
        """
        y_predict = self.model()
        saver = tf.compat.v1.train.Saver()
        with tf.compat.v1.Session() as sess:
            saver.restore(sess, self.model_name)
            predict_y = tf.argmax(y_predict, -1)
            # result
            raw_result = []

            for img_path, img in self._gen_valid_img(dir_name):
                predict = sess.run(predict_y,
                                   feed_dict={
                                       self.X: [img],
                                       self.keep_prob: 1.0
                                   })
                predict_value = predict[0]
                _, vod_time, _ = img_path.stem.split('_')
                if predict_value == 1:
                    raw_result.append(
                        Duration.set_time(vod_time).to_duration())

        self.log.info(
            f'[{self.user_id}/{dir_name} local run raw_result] {raw_result}')
        # res
        try:
            result = self._get_duration_range(tar_sec, raw_result)
            self.log.info(f'[{self.user_id}/{dir_name} local run] {result}')
            return result
        except Exception as e:
            self.log.error(f'[{self.user_id}/{dir_name} local run] {str(e)}')
コード例 #5
0
 def test_img(self, img_name: str):
     station_num, h, m, s = re.search(r'(.*?)_(.*?):(.*?):(.*)\.jpg',
                                      img_name).groups()
     param = self._get_thumbnail_param(
         int(station_num),
         Duration.delta(int(h), int(m), int(s)).to_duration())
     print(self.THUMBNAIL_URL + util.join_params(**param))
コード例 #6
0
ファイル: base_m3u8.py プロジェクト: Monardes/AfreecaDance
    def _trans_set2result(self, vod_set: dict) -> typing.Dict:
        result = {}
        for station_num, vod_range in vod_set.items():
            result[station_num] = []
            for start, s_range in vod_range.items():
                s = start.split('_')[-1]
                for min_r, max_r in s_range:
                    min_r_str = (Duration.set_time(s) + duration_delta(
                        s=min_r *
                        config.THUMBNAIL_SIZE.DURATION_SEC)).to_str()
                    max_r_str = (Duration.set_time(s) + duration_delta(
                        s=max_r *
                        config.THUMBNAIL_SIZE.DURATION_SEC)).to_str()
                    result[station_num].append((min_r_str, max_r_str))

        return result
コード例 #7
0
ファイル: test_m3u8.py プロジェクト: weakeng/AfreecaDance
def merge_m3u8(tar_video: dict):
    path = 'video/47859255'
    os.makedirs(path, exist_ok=True)

    for t in tar:
        min_range, max_range = t
        min_d = Duration.set_time(min_range).to_duration()
        max_d = Duration.set_time(max_range).to_duration()

        for i in range(min_d, max_d):
            if i in tar_video:
                content = util.post_content(tar_video[i])
                if content is not None:
                    with open(os.path.join(path, f'{i}.ts'), 'wb') as f:
                        f.write(content)

    merge_ts2mp4(path)
コード例 #8
0
def sub_path(path: str, num, label: int = 0) -> str:
    p = Path(path)
    station_str, raw_duration = p.stem.split('_')
    duration = (Duration.set_time(raw_duration) +
                duration_delta(s=num)).to_str()
    file_name = f'{station_str}_{duration}_{label}{p.suffix}'
    file_dir = 'img'
    sub_img_path = Path(file_dir, file_name)
    return str(sub_img_path)
コード例 #9
0
ファイル: test_timeutil.py プロジェクト: weakeng/AfreecaDance
def test_duration():
    assert Duration.set_time('2:5:0').to_str() == '2:5:0'
    assert Duration.set_time('2:5:0').to_duration() == 7500
    assert Duration.set_duration(7500).to_duration() == 7500
    assert Duration.set_duration(7500).to_str() == '2:5:0'
    assert (Duration.set_duration(7500) - duration_delta(m=5)).to_str() == '2:0:0'
    assert (Duration.set_duration(7500) - duration_delta(s=5)).to_str() == '2:4:55'
コード例 #10
0
ファイル: base_spider.py プロジェクト: Monardes/AfreecaDance
    def _find_top(self, raw_data: typing.List,
                  time_duration: int) -> typing.Optional[typing.List]:
        if not raw_data or not time_duration:
            return

        cnt = pd.DataFrame(raw_data, columns=['index', 'value'])

        perfect_duration = (
            Duration.set_duration(time_duration) -
            duration_delta(m=self.perfect_start_min)).to_duration()
        per_index = time_duration // len(cnt)
        diff_duration = time_duration - perfect_duration
        perfect_start = diff_duration // per_index

        y = pd.DataFrame(raw_data[perfect_start:], columns=['index', 'value'])
        sma_period = perfect_start * self.smooth_factor
        Y = ta.SMA(y['value'].values.astype('float64'),
                   timeperiod=sma_period).tolist()

        top = []
        for i, d in enumerate(Y):
            if d > 0 and i < len(Y) - 1:
                if (Y[i - 1] <= d and d >= Y[i + 1]) or (i == 0
                                                         and d >= Y[i + 1]):
                    top.append((i + perfect_start, d))

        def row_sma(row):
            cond = (row['start_index'] <= cnt['index']) & (cnt['index'] <=
                                                           row['index'])
            max_id = cnt.where(cond).dropna()['value'].idxmax()
            result = cnt.loc[max_id]
            row['ori_index'] = result['index']
            row['ori_value'] = result['value']
            ori_duration = result['index'] * per_index
            start_duration = Duration.set_duration(ori_duration - MIN_SEC *
                                                   self.range_factor).to_str()
            end_duration = Duration.set_duration(ori_duration + MIN_SEC *
                                                 self.range_factor).to_str()
            row['ori_range_duration'] = (start_duration, end_duration)
            return row

        top_df = pd.DataFrame(top, columns=['index', 'value'])
        top_df['value'].where(top_df['value'] > top_df['value'].mean(),
                              inplace=True)
        top_df = top_df.where(top_df['value'] > 0).dropna()
        top_df['start_index'] = top_df['index'] - sma_period + 1
        top_df['index'] = top_df['index']
        target = top_df.apply(row_sma, axis=1)

        return target['ori_range_duration'].drop_duplicates().to_list()
コード例 #11
0
ファイル: base_model.py プロジェクト: weakeng/AfreecaDance
    def _get_duration_range(self, tar_sec: int,
                            raw_duration: typing.List) -> typing.List:
        range_long = tar_sec // config.THUMBNAIL_SIZE.DURATION_SEC
        if len(raw_duration) < range_long:
            raise Exception(f'tar vod should logger than {tar_sec}')

        tar_duration = sorted(raw_duration)
        result, tmp = [], []

        for i in range(1, len(raw_duration)):
            if tar_duration[i] - tar_duration[
                    i - 1] == config.THUMBNAIL_SIZE.DURATION_SEC:
                # 同一时间段先塞
                tmp.append(tar_duration[i - 1])
            elif tmp:
                if tar_duration[
                        i - 1] - tmp[-1] == config.THUMBNAIL_SIZE.DURATION_SEC:
                    # 出现断层塞时间段的最后一个
                    tmp.append(tar_duration[i - 1])
                if len(tmp) > tar_sec // config.THUMBNAIL_SIZE.DURATION_SEC:
                    # 时间段长度
                    start_time = Duration.set_duration(tmp[0]).to_str()
                    end_time = Duration.set_duration(tmp[-1]).to_str()
                    result.append((start_time, end_time))
                tmp = []

        # 到最后都是连续的没有断层
        if tmp:
            if tar_duration[i -
                            1] - tmp[-1] == config.THUMBNAIL_SIZE.DURATION_SEC:
                tmp.append(tar_duration[i - 1])
            if len(tmp) > tar_sec // config.THUMBNAIL_SIZE.DURATION_SEC:
                start_time = Duration.set_duration(tmp[0]).to_str()
                end_time = Duration.set_duration(tmp[-1]).to_str()
                result.append((start_time, end_time))

        return result
コード例 #12
0
 def sub_path(
     self,
     path,
     file_dir: str,
     num,
     label: int = 0,
 ) -> str:
     p = Path(path)
     station_str, raw_duration = p.stem.split('_')
     duration = (Duration.set_time(raw_duration) +
                 duration_delta(s=num)).to_str()
     file_name = f'{duration}_{label}{p.suffix}'
     # self.log.info(f'[{self.user_id}:{station_str}] {file_name}')
     file_dir = file_dir
     # sub_img_path = Path(file_dir, station_str, file_name)
     sub_img_path = Path(file_dir, f'{station_str}_{file_name}')
     if not sub_img_path.parent.exists():
         sub_img_path.parent.mkdir(parents=True)
     return str(sub_img_path)
コード例 #13
0
ファイル: base_model.py プロジェクト: weakeng/AfreecaDance
 def _get_duration_key(self, vod_time: str):
     vod_time_duration = Duration.set_time(vod_time).to_duration() - 1
     set_time_key = vod_time_duration // TOTAL_DURATION * TOTAL_DURATION
     return Duration.set_duration(set_time_key).to_str()
コード例 #14
0
def pl(raw_data, time_duration, station_num):
    if raw_data and time_duration:
        cnt = pd.DataFrame(raw_data, columns=['index', 'value'])
        x = list(range(0, len(cnt)))

        perfect_duration = (Duration.set_duration(time_duration) -
                            duration_delta(m=5)).to_duration()
        per_index = time_duration // len(cnt)
        diff_duration = time_duration - perfect_duration
        perfect_start = diff_duration // per_index

        y = pd.DataFrame(raw_data[perfect_start:], columns=['index', 'value'])

        sma_period = perfect_start * 2
        Y = ta.SMA(y['value'].values.astype('float64'),
                   timeperiod=sma_period).tolist()

        top = []

        for i, d in enumerate(Y):
            if d > 0 and i < len(Y) - 1:
                if (Y[i - 1] <= d and d >= Y[i + 1]) or (i == 0
                                                         and d >= Y[i + 1]):
                    top.append((i + perfect_start, d))
                # elif Y[i - 1] >= d and d <= Y[i + 1]:
                #     bottom.append((i, d))

        def row_sma(row):
            cond = (row['start_index'] <= cnt['index']) & (cnt['index'] <=
                                                           row['index'])
            max_id = cnt.where(cond).dropna()['value'].idxmax()
            result = cnt.loc[max_id]
            row['ori_index'] = result['index']
            row['ori_value'] = result['value']
            row['ori_duration'] = result['index'] * per_index
            row['ori_range_duration'] = (row['ori_duration'] - MIN_SEC * 3,
                                         row['ori_duration'] + MIN_SEC * 3)
            return row

        top_df = pd.DataFrame(top, columns=['index', 'value'])
        top_df['value'].where(top_df['value'] > top_df['value'].mean(),
                              inplace=True)
        top_df = top_df.where(top_df['value'] > 0).dropna()
        top_df['start_index'] = top_df['index'] - sma_period + 1
        top_df['index'] = top_df['index']

        a = top_df.apply(row_sma, axis=1)

        plt.plot(x, cnt['value'].values, 'r', linewidth=1, label='ori')
        plt.plot(x[5:], Y, 'b', linewidth=1, label=f'sma-{sma_period}')

        plt.scatter(top_df['index'],
                    top_df['value'],
                    100,
                    marker='^',
                    label='max')
        plt.scatter(a['ori_index'],
                    a['ori_value'],
                    100,
                    marker='v',
                    label='ori_max')
        plt.title(station_num)
        plt.legend()
        plt.show()
コード例 #15
0
ファイル: test_timeutil.py プロジェクト: weakeng/AfreecaDance
def test_d():
    d = Duration.set_time('5:9:57').to_duration()
    total_duration = config.THUMBNAIL_SIZE.DURATION_SEC * config.THUMBNAIL_SIZE.DURATION_SEC * config.THUMBNAIL_SIZE.COLUMN_COUNT
    print(total_duration)
    print(Duration.set_duration(d - (d % total_duration)).to_str())
コード例 #16
0
ファイル: test_timeutil.py プロジェクト: weakeng/AfreecaDance
def test_c():
    d = Duration.set_time('0:59:33').to_duration() - Duration.set_time('0:55:00').to_duration()
    print(d/3)
コード例 #17
0
ファイル: base_model.py プロジェクト: weakeng/AfreecaDance
    def valid_run(self, dir_name: str, small_range_sec=15):
        """run model"""
        # 小图总时长
        total_duration = config.THUMBNAIL_SIZE.DURATION_SEC * config.THUMBNAIL_SIZE.ROW_COUNT * config.THUMBNAIL_SIZE.COLUMN_COUNT
        # 目标时长
        TAR_SEC = 3 * 60
        # small range 时长
        SMALL_RANGE_SEC = small_range_sec

        def gen_valid_img():
            data_path = Path(config.DATA.DATA_PATH, self.user_id, 'valid_data')
            for img_path in data_path.glob(dir_name + '*'):
                img = self._prepare_img_by_path(str(img_path))
                yield img_path, img

        def get_duration_key(vod_time: str):
            vod_time_duration = Duration.set_time(vod_time).to_duration() - 1
            set_time_key = vod_time_duration // total_duration * total_duration

            return Duration.set_duration(set_time_key).to_str()

        def get_duration_range(raw_duration: typing.List):
            range_long = TAR_SEC // config.THUMBNAIL_SIZE.DURATION_SEC
            if len(raw_duration) <= range_long:
                return None

            tar_duration = sorted(raw_duration)
            result, tmp = [], []

            for i in range(1, len(raw_duration)):
                if tar_duration[i] - tar_duration[
                        i - 1] == config.THUMBNAIL_SIZE.DURATION_SEC:
                    tmp.append(tar_duration[i - 1])
                elif tmp:
                    if tar_duration[
                            i -
                            1] - tmp[-1] == config.THUMBNAIL_SIZE.DURATION_SEC:
                        tmp.append(tar_duration[i - 1])
                    if len(
                            tmp
                    ) > SMALL_RANGE_SEC // config.THUMBNAIL_SIZE.DURATION_SEC:
                        start_time = Duration.set_duration(tmp[0]).to_str()
                        end_time = Duration.set_duration(tmp[-1]).to_str()
                        result.append((start_time, end_time))
                    tmp = []

            # 到最后都是连续的
            if tmp:
                if tar_duration[
                        i - 1] - tmp[-1] == config.THUMBNAIL_SIZE.DURATION_SEC:
                    tmp.append(tar_duration[i - 1])
                if len(
                        tmp
                ) > SMALL_RANGE_SEC // config.THUMBNAIL_SIZE.DURATION_SEC:
                    start_time = Duration.set_duration(tmp[0]).to_str()
                    end_time = Duration.set_duration(tmp[-1]).to_str()
                    result.append((start_time, end_time))

            return result

        def precition(TP, FP: int):
            return TP / (TP + FP)

        def recall(TP, FN: int):
            return TP / (TP + FN)

        def f_score(TP, FP, FN: int):
            return 2 * TP / (2 * TP + FP + FN)

        y_predict = self.model()
        saver = tf.train.Saver()
        with tf.compat.v1.Session() as sess:
            saver.restore(sess, self.model_name)
            predict_y = tf.argmax(y_predict, -1)
            # stat
            stat_dict = defaultdict(dict)
            TP, TN, FP, FN = 0, 0, 0, 0
            # result
            raw_result = []

            for img_path, img in gen_valid_img():
                predict = sess.run(predict_y,
                                   feed_dict={
                                       self.X: [img],
                                       self.keep_prob: 1.0
                                   })
                predict_value = predict[0]
                vod_name, vod_time, label = img_path.stem.split('_')
                vod_time_key = get_duration_key(vod_time)
                if predict_value == 1:
                    raw_result.append(
                        Duration.set_time(vod_time).to_duration())

                # acc
                stat_dict[vod_time_key].setdefault('right', 0)
                stat_dict[vod_time_key].setdefault('sum', 0)
                stat_dict[vod_time_key].setdefault('error_list', [])
                stat_dict[vod_time_key]['sum'] += 1
                if int(label) == predict_value:
                    stat_dict[vod_time_key]['right'] += 1
                else:
                    stat_dict[vod_time_key]['error_list'].append(
                        img_path.stem.replace(':', '/'))
                # confusion matrix
                if int(label) == predict_value == 1:
                    TP += 1
                elif int(label) == predict_value == 0:
                    TN += 1
                elif int(label) == 1 and predict_value == 0:
                    FN += 1
                elif int(label) == 0 and predict_value == 1:
                    FP += 1
        # res
        result = get_duration_range(raw_result)
        self.log.info(f'[{self.user_id}/{dir_name}] {result}')
        # stat
        rate = []
        for k, v in stat_dict.items():
            v['rate'] = v.pop('right') / v.pop('sum')
            rate.append(v['rate'])
        pprint(stat_dict)
        acc_msg = f'acc {statistics.mean(rate):.4%}'
        confusion_matrix_msg = f'precition {precition(TP, FP):.4} recall {recall(TP, FN):.4} f_score {f_score(TP, FP, FN):.4}'
        self.log.info(
            f'[{self.user_id}/{dir_name} valid] {acc_msg} {confusion_matrix_msg}'
        )
        pprint(acc_msg)
        pprint(confusion_matrix_msg)
        pprint(result)
コード例 #18
0
ファイル: base_model.py プロジェクト: weakeng/AfreecaDance
        def get_duration_key(vod_time: str):
            vod_time_duration = Duration.set_time(vod_time).to_duration() - 1
            set_time_key = vod_time_duration // total_duration * total_duration

            return Duration.set_duration(set_time_key).to_str()