Beispiel #1
0
    def _iter_subset_batch_data(self, subset_df, batch_size, repeat, shuffle):
        # 전체 데이터를 순회할 때까지 반복
        while True:
            # 주어진 데이터에서 현재 iterator의 위치
            i = 0

            if shuffle:
                subset_df = subset_df.sample(frac=1)

            while True:
                # 배치 데이터 slicing
                batch_df = subset_df.iloc[i:i + batch_size]

                # 데이터가 없으면 iteration 종료
                if len(batch_df) == 0:
                    break

                # 모든 배치 데이터에 대해 segment 데이터 읽어와서 리스트 생성
                batch_data = [
                    cu.load(segment['path'])
                    for _, segment in batch_df.iterrows()
                ]

                # x, y 데이터 분리
                batch_x_video, batch_x_audio, batch_y = zip(
                    *[(segment['video'], segment['audio'], segment['label'])
                      for segment in batch_data])

                batch_x_video = np.array(batch_x_video)
                batch_x_audio = np.array(batch_x_audio)
                batch_y = np.array(batch_y).reshape(-1, 1)

                # 데이터를 iterator로 반환
                batch_x = []
                if 'video' in self.x_includes:
                    batch_x.append(batch_x_video)
                if 'audio' in self.x_includes:
                    batch_x.append(batch_x_audio)

                yield batch_x, batch_y

                i += batch_size

            if not repeat:
                break
Beispiel #2
0
def convert_data(input_video_dir,
                 segment_length,
                 video_sample_rate,
                 video_width,
                 video_height,
                 audio_sample_rate,
                 apply_mfcc,
                 output_dataset_dir=None):
    # output_dataset_dir이 주어지지 않은 경우 기본값으로 정의
    if not output_dataset_dir:
        output_dataset_dir = f'dataset_sl{segment_length}_vsr{video_sample_rate}_vw{video_width}_vh{video_height}_asr{audio_sample_rate}{"_mfcc" if apply_mfcc else ""}'

    config = {
        'segment_length': segment_length,
        'video_sample_rate': video_sample_rate,
        'video_width': video_width,
        'video_height': video_height,
        'audio_sample_rate': audio_sample_rate,
        'apply_mfcc': apply_mfcc
    }

    print(f'input_video_dir: {input_video_dir}')
    print(f'output_dataset_dir: {output_dataset_dir}')
    print('config:')
    pp(config)
    print()

    # output_dataset_dir이 이미 존재하는 경우 종료
    if os.path.exists(output_dataset_dir):
        print(
            f'WARNING: output_dataset_dir \"{output_dataset_dir}\" already exists.'
        )
        if input('Remove all contents and continue? (y/n): ').lower().strip(
        ) == 'y':
            import shutil
            shutil.rmtree(output_dataset_dir)
        else:
            quit(1)

    os.makedirs(output_dataset_dir, exist_ok=True)

    # highlight 구간 파일 경로를 기준으로 영상 파일 경로와 제목을 생성
    hl_section_path_list = glob.glob(os.path.join(input_video_dir, '*.txt'))
    video_path_list = [
        os.path.splitext(path)[0] + '.mp4' for path in hl_section_path_list
    ]
    title_list = [
        os.path.splitext(os.path.split(path)[1])[0]
        for path in hl_section_path_list
    ]

    # video_path_list 중 존재하지 않는 파일이 있으면 종료
    video_path_exists = list(map(os.path.exists, video_path_list))
    if not all(video_path_exists):
        for video_path in [
                video_path for exists, video_path in zip(
                    video_path_exists, video_path_list) if not exists
        ]:
            print(f'ERROR: {video_path} not exists')
        quit(1)

    start = dt.now()

    # 각 원본영상과 highlight 구간 파일을 읽고 segment 단위로 나누어 저장
    with mp.Pool() as pool:
        params = zip(repeat(config), title_list, video_path_list,
                     hl_section_path_list, repeat(output_dataset_dir))
        output_path_list_list = pool.starmap(generate_segment_data, params)
        total_segment_count = sum(map(len, output_path_list_list))

    # get video/audio data shape
    segment_data = cu.load(output_path_list_list[0][0])
    video_data_shape = segment_data['video'].shape
    audio_data_shape = segment_data['audio'].shape

    # metadata 기록
    metadata = {
        'created': str(dt.now()),
        'config': config,
        'data_shape': {
            'video': video_data_shape,
            'audio': audio_data_shape
        },
        'total_segment_count': total_segment_count,
        'segment_counts': {
            title: count
            for title, count in zip(title_list, map(len,
                                                    output_path_list_list))
        }
    }
    metadata_path = os.path.join(output_dataset_dir, 'metadata.json')
    with open(metadata_path, 'w') as f:
        json.dump(metadata, f, indent=4)

    end = dt.now()

    print()
    print(
        f'Total {total_segment_count} segments saved, elapsed time: {end - start}s'
    )
Beispiel #3
0
    def _iter_subset_batch_data(self, subset_df, batch_size, repeat, shuffle):
        # 전체 데이터를 순회할 때까지 반복
        while True:
            # 주어진 데이터에서 현재 iterator의 위치
            i = 0

            if shuffle:
                subset_df = subset_df.sample(frac=1)

            while True:
                # 배치 데이터 slicing
                batch_df = subset_df.iloc[i:i + batch_size]

                # 데이터가 없으면 iteration 종료
                if len(batch_df) == 0:
                    break

                # 모든 배치 데이터에 대해 segment 데이터 읽어와서 리스트 생성
                batch_data = []
                for _, segment in batch_df.iterrows():
                    target_segment_data = cu.load(segment['path'])

                    # 시간값 추가
                    target_segment_data['time'] = target_segment_data[
                        'start_sec'] / target_segment_data['total_duration']

                    if self.x_expand > 0:
                        # 앞/뒤 segment 결합
                        segment_data_list = []
                        title = segment['title']
                        target_index = segment['index']
                        title_segment_list = self.all_segment_dict[title]
                        segment_data_zero = {
                            'video':
                            np.zeros_like(target_segment_data['video']),
                            'audio':
                            np.zeros_like(target_segment_data['audio'])
                        }

                        for index in range(target_index - self.x_expand,
                                           target_index + self.x_expand + 1):
                            if index == target_index:
                                segment_data_list.append(target_segment_data)
                            elif 0 <= index < len(title_segment_list):
                                segment_data_list.append(
                                    cu.load(title_segment_list[index]['path']))
                            else:
                                segment_data_list.append(segment_data_zero)

                        video_list, audio_list = zip(
                            *[(s['video'], s['audio'])
                              for s in segment_data_list])

                        # 원본 데이터 교체
                        target_segment_data['video'] = np.array(video_list)
                        target_segment_data['audio'] = np.array(audio_list)

                    batch_data.append(target_segment_data)

                # x, y 데이터 분리
                batch_x_video, batch_x_audio, batch_x_time, = zip(
                    *[(s['video'], s['audio'], s['time']) for s in batch_data])

                batch_x_video = np.array(batch_x_video, dtype=np.float16)
                batch_x_audio = np.array(batch_x_audio, dtype=np.float16)
                batch_x_time = np.array(batch_x_time, dtype=np.float16)

                # 데이터를 iterator로 반환
                batch_x = []
                if 'video' in self.x_includes:
                    batch_x.append(batch_x_video)
                if 'audio' in self.x_includes:
                    batch_x.append(batch_x_audio)
                if 'time' in self.x_includes:
                    batch_x.append(batch_x_time)

                yield batch_x

                i += batch_size

            if not repeat:
                break