def _iter_subset_batch_data(self, subset_df, batch_size, repeat, shuffle): # 전체 데이터를 순회할 때까지 반복 while True: # 주어진 데이터에서 현재 iterator의 위치 i = 0 if shuffle: subset_df = subset_df.sample(frac=1) while True: # 배치 데이터 slicing batch_df = subset_df.iloc[i:i + batch_size] # 데이터가 없으면 iteration 종료 if len(batch_df) == 0: break # 모든 배치 데이터에 대해 segment 데이터 읽어와서 리스트 생성 batch_data = [ cu.load(segment['path']) for _, segment in batch_df.iterrows() ] # x, y 데이터 분리 batch_x_video, batch_x_audio, batch_y = zip( *[(segment['video'], segment['audio'], segment['label']) for segment in batch_data]) batch_x_video = np.array(batch_x_video) batch_x_audio = np.array(batch_x_audio) batch_y = np.array(batch_y).reshape(-1, 1) # 데이터를 iterator로 반환 batch_x = [] if 'video' in self.x_includes: batch_x.append(batch_x_video) if 'audio' in self.x_includes: batch_x.append(batch_x_audio) yield batch_x, batch_y i += batch_size if not repeat: break
def convert_data(input_video_dir, segment_length, video_sample_rate, video_width, video_height, audio_sample_rate, apply_mfcc, output_dataset_dir=None): # output_dataset_dir이 주어지지 않은 경우 기본값으로 정의 if not output_dataset_dir: output_dataset_dir = f'dataset_sl{segment_length}_vsr{video_sample_rate}_vw{video_width}_vh{video_height}_asr{audio_sample_rate}{"_mfcc" if apply_mfcc else ""}' config = { 'segment_length': segment_length, 'video_sample_rate': video_sample_rate, 'video_width': video_width, 'video_height': video_height, 'audio_sample_rate': audio_sample_rate, 'apply_mfcc': apply_mfcc } print(f'input_video_dir: {input_video_dir}') print(f'output_dataset_dir: {output_dataset_dir}') print('config:') pp(config) print() # output_dataset_dir이 이미 존재하는 경우 종료 if os.path.exists(output_dataset_dir): print( f'WARNING: output_dataset_dir \"{output_dataset_dir}\" already exists.' ) if input('Remove all contents and continue? (y/n): ').lower().strip( ) == 'y': import shutil shutil.rmtree(output_dataset_dir) else: quit(1) os.makedirs(output_dataset_dir, exist_ok=True) # highlight 구간 파일 경로를 기준으로 영상 파일 경로와 제목을 생성 hl_section_path_list = glob.glob(os.path.join(input_video_dir, '*.txt')) video_path_list = [ os.path.splitext(path)[0] + '.mp4' for path in hl_section_path_list ] title_list = [ os.path.splitext(os.path.split(path)[1])[0] for path in hl_section_path_list ] # video_path_list 중 존재하지 않는 파일이 있으면 종료 video_path_exists = list(map(os.path.exists, video_path_list)) if not all(video_path_exists): for video_path in [ video_path for exists, video_path in zip( video_path_exists, video_path_list) if not exists ]: print(f'ERROR: {video_path} not exists') quit(1) start = dt.now() # 각 원본영상과 highlight 구간 파일을 읽고 segment 단위로 나누어 저장 with mp.Pool() as pool: params = zip(repeat(config), title_list, video_path_list, hl_section_path_list, repeat(output_dataset_dir)) output_path_list_list = pool.starmap(generate_segment_data, params) total_segment_count = sum(map(len, output_path_list_list)) # get video/audio data shape segment_data = cu.load(output_path_list_list[0][0]) video_data_shape = segment_data['video'].shape audio_data_shape = segment_data['audio'].shape # metadata 기록 metadata = { 'created': str(dt.now()), 'config': config, 'data_shape': { 'video': video_data_shape, 'audio': audio_data_shape }, 'total_segment_count': total_segment_count, 'segment_counts': { title: count for title, count in zip(title_list, map(len, output_path_list_list)) } } metadata_path = os.path.join(output_dataset_dir, 'metadata.json') with open(metadata_path, 'w') as f: json.dump(metadata, f, indent=4) end = dt.now() print() print( f'Total {total_segment_count} segments saved, elapsed time: {end - start}s' )
def _iter_subset_batch_data(self, subset_df, batch_size, repeat, shuffle): # 전체 데이터를 순회할 때까지 반복 while True: # 주어진 데이터에서 현재 iterator의 위치 i = 0 if shuffle: subset_df = subset_df.sample(frac=1) while True: # 배치 데이터 slicing batch_df = subset_df.iloc[i:i + batch_size] # 데이터가 없으면 iteration 종료 if len(batch_df) == 0: break # 모든 배치 데이터에 대해 segment 데이터 읽어와서 리스트 생성 batch_data = [] for _, segment in batch_df.iterrows(): target_segment_data = cu.load(segment['path']) # 시간값 추가 target_segment_data['time'] = target_segment_data[ 'start_sec'] / target_segment_data['total_duration'] if self.x_expand > 0: # 앞/뒤 segment 결합 segment_data_list = [] title = segment['title'] target_index = segment['index'] title_segment_list = self.all_segment_dict[title] segment_data_zero = { 'video': np.zeros_like(target_segment_data['video']), 'audio': np.zeros_like(target_segment_data['audio']) } for index in range(target_index - self.x_expand, target_index + self.x_expand + 1): if index == target_index: segment_data_list.append(target_segment_data) elif 0 <= index < len(title_segment_list): segment_data_list.append( cu.load(title_segment_list[index]['path'])) else: segment_data_list.append(segment_data_zero) video_list, audio_list = zip( *[(s['video'], s['audio']) for s in segment_data_list]) # 원본 데이터 교체 target_segment_data['video'] = np.array(video_list) target_segment_data['audio'] = np.array(audio_list) batch_data.append(target_segment_data) # x, y 데이터 분리 batch_x_video, batch_x_audio, batch_x_time, = zip( *[(s['video'], s['audio'], s['time']) for s in batch_data]) batch_x_video = np.array(batch_x_video, dtype=np.float16) batch_x_audio = np.array(batch_x_audio, dtype=np.float16) batch_x_time = np.array(batch_x_time, dtype=np.float16) # 데이터를 iterator로 반환 batch_x = [] if 'video' in self.x_includes: batch_x.append(batch_x_video) if 'audio' in self.x_includes: batch_x.append(batch_x_audio) if 'time' in self.x_includes: batch_x.append(batch_x_time) yield batch_x i += batch_size if not repeat: break