Beispiel #1
0
    def make_meta(self, root_dir: str):
        # Use all audio files
        # directory names
        print('Lookup files ...')
        mixture_list = glob.glob(
            os.path.join(root_dir, '*', '*', 'mixture.*.npy'))

        # It only extract vocals. If you wanna use other source, override it.
        vocals_list = glob.glob(
            os.path.join(root_dir, '*', '*', 'vocals.*.npy'))

        # make meta dict
        print('Make meta information ...')

        # setup meta
        self._meta['mixture_filename'] = sorted(mixture_list)
        self._meta['voice_filename'] = sorted(vocals_list)

        # split train / val
        print('Make train / val meta')
        train_meta, val_meta = split_train_val_frame(self._meta, val_rate=0.1)

        # save data frames
        print('Save meta frames on {}'.format(' '.join(self.frame_file_names)))
        self.save_meta(self.frame_file_names, self.meta_path, self._meta,
                       train_meta, val_meta)
Beispiel #2
0
    def make_meta(self, mixtures, vocals, backgrounds):
        # lookup subset files and concatenate all
        mixtures = [
            f for parent in mixtures
            for f in glob.glob(parent.replace('.npy', '.*.npy'))
        ]
        vocals = [
            f for parent in vocals
            for f in glob.glob(parent.replace('.npy', '.*.npy'))
        ]
        backgrounds = [
            f for parent in backgrounds
            for f in glob.glob(parent.replace('.npy', '.*.npy'))
        ]

        # setup meta
        self._meta['mixture_filename'] = sorted(mixtures)
        self._meta['voice_filename'] = sorted(vocals)
        self._meta['background_filename'] = sorted(backgrounds)

        # split train / val
        print('Make train / val meta')
        train_meta, val_meta = split_train_val_frame(self._meta, val_rate=0.1)

        # save data frames
        print('Save meta frames on {}'.format(' '.join(self.frame_file_names)))
        self.save_meta(self.frame_file_names, self.meta_path, self._meta,
                       train_meta, val_meta)
Beispiel #3
0
    def make_meta(self, wav_file_list: List[str], text_file_list: List[str]):
        # make mapper
        info = {'audio_filename': {}, 'text': {}, 'phoneme': {}, 'speaker': {}}
        speakers = []

        # parse wav keys
        for wav_path in wav_file_list:
            key = os.path.basename(wav_path).replace('.wav', '')
            info['audio_filename'][key] = wav_path
            speaker = key.split('_')[0]
            speakers.append(speaker)

        # speaker
        speakers = {spk: idx for idx, spk in enumerate(list(set(speakers)))}
        for key in info['audio_filename'].keys():
            speaker = key.split('_')[0]
            info['speaker'][key] = speakers[speaker]

        # parse texts
        # kog2p
        kog2p = KoG2P()
        for text_path in text_file_list:
            # read text file
            with open(text_path, 'r') as r:
                for line in r.readlines():
                    spl = line.split(' ')
                    key, text = spl[0], ' '.join(spl[1:])
                    info['text'][key] = text
                    info['phoneme'][key] = kog2p.g2p(text)

        # change meta obj
        self._meta = pd.DataFrame(info)
        self._meta['pass'] = [True] * len(self._meta)

        # read duration
        print('Check durations on wave files ...')
        dur_list = self._process_duration(self._meta['audio_filename'].values,
                                          settings.MIN_WAV_RATE, settings.MAX_WAV_RATE)
        self._meta['duration'] = dur_list

        # filter passed rows
        self._meta = self._meta[self._meta['pass'].values]
        self._meta = self._meta.dropna()

        # split train / val
        print('Make train / val meta')
        train_meta, val_meta = split_train_val_frame(self._meta, val_rate=0.05)

        # save data frames
        print('Save meta frames on {}'.format(' '.join(self.frame_file_names)))
        self.save_meta(self.frame_file_names, self.meta_path, self._meta, train_meta, val_meta)
Beispiel #4
0
    def make_meta(self, root_dir: str, min_wav_rate: int, max_wav_rate: int,
                  min_txt_rate: float):
        # speakers
        print('list up speakers')
        speakers = os.listdir(root_dir)

        # look up files
        print('lookup files...')
        wav_file_list = []
        speaker_mult = []
        for speaker in tqdm(speakers):
            file_temp = glob.glob(
                os.path.join(root_dir, speaker, 'wav', '*.wav'))
            wav_file_list.extend(file_temp)
            speaker_mult.extend(list(repeat(speaker, len(file_temp))))

        print('Update meta infos')
        speaker_mappings = {
            spk: idx
            for idx, spk in enumerate(sorted(speakers))
        }
        # update infos
        self._meta['speaker'] = [speaker_mappings[idx] for idx in speaker_mult]
        self._meta['audio_filename'] = wav_file_list
        self._meta['pass'] = [True] * len(speaker_mult)

        # read duration
        print('Check durations on wave files ...')
        dur_list = self._process_duration(wav_file_list, min_wav_rate,
                                          max_wav_rate)
        self._meta['duration'] = dur_list

        # text process
        print('Text pre-process ... ')
        txt_file_list = [
            file_path.replace('wav', 'txt') for file_path in wav_file_list
        ]
        self._process_txt(txt_file_list, dur_list, min_txt_rate)

        # filter passed rows
        self._meta = self._meta[self._meta['pass'].values]

        # split train / val
        print('Make train / val meta')
        train_meta, val_meta = split_train_val_frame(self._meta)

        # save data frames
        print('Save meta frames on {}'.format(' '.join(self.frame_file_names)))
        self.save_meta(self.frame_file_names, self.meta_path, self._meta,
                       train_meta, val_meta)
Beispiel #5
0
    def make_meta(self, wav_file_list: List[str], txt_info: pd.DataFrame):

        # dictionary for making data frame
        infos = {'audio_filename': {}, 'text': {}}

        # make wav file / id mapping
        wav_mapping = {
            os.path.basename(wav_file_path).split('.')[0]: wav_file_path
            for wav_file_path in wav_file_list
        }

        # mapping texts
        for row in txt_info.iterrows():
            val_series = row[1]
            # ['id', 'text', 'normalized_text']
            id_, norm_text = val_series['id'], val_series['normalized_text']

            wav_file_path = wav_mapping[id_]

            infos['audio_filename'][id_] = wav_file_path
            infos['text'][id_] = norm_text

        # change meta obj
        self._meta = pd.DataFrame(infos)
        self._meta['pass'] = [True] * len(self._meta)

        # read duration
        print('Check durations on wave files ...')
        dur_list = self._process_duration(self._meta['audio_filename'].values,
                                          settings.MIN_WAV_RATE,
                                          settings.MAX_WAV_RATE)
        self._meta['duration'] = dur_list

        # filter passed rows
        self._meta = self._meta[self._meta['pass'].values]
        self._meta = self._meta.dropna()

        # split train / val
        print('Make train / val meta')
        train_meta, val_meta = split_train_val_frame(self._meta, val_rate=0.1)

        # save data frames
        print('Save meta frames on {}'.format(' '.join(self.frame_file_names)))
        self.save_meta(self.frame_file_names, self.meta_path, self._meta,
                       train_meta, val_meta)
Beispiel #6
0
    def make_meta(self):
        # convert from related path to absolute path
        audio_file_names = self._meta['audio_filename']
        midi_file_names = self._meta['midi_filename']
        new_audio_files = [
            os.path.join(self.root_dir, name) for name in audio_file_names
        ]
        new_midi_files = [
            os.path.join(self.root_dir, name) for name in midi_file_names
        ]
        self._meta['audio_filename'] = new_audio_files
        self._meta['midi_filename'] = new_midi_files

        # split train / val
        print('Make train / val meta')
        train_meta, val_meta = split_train_val_frame(self._meta)

        # save data frames
        print('Save meta frames on {}'.format(' '.join(self.frame_file_names)))
        self.save_meta(self.root_dir, self._meta, train_meta, val_meta)
Beispiel #7
0
    def make_meta(self, chunk_file_list, speakers, val_rate: float = 0.1):
        # make dict
        info = {'audio_filename': chunk_file_list, 'speaker': speakers}

        # change meta obj
        self._meta = pd.DataFrame(info)

        # make speaker as indices
        speaker_mappings = {
            spk: idx
            for idx, spk in enumerate(
                sorted(list(set(self._meta['speaker'].values))))
        }

        # update infos
        self._meta['speaker'] = [
            speaker_mappings[spk] for spk in self._meta['speaker'].values
        ]
        self._meta['pass'] = [True] * len(self._meta)

        # read duration
        print('Check durations on wave files ...')
        dur_list = self._process_duration(self._meta['audio_filename'].values,
                                          0, 0)
        self._meta['duration'] = dur_list

        # split train / val
        print('Make train / val meta')
        train_meta, val_meta = split_train_val_frame(self._meta,
                                                     val_rate=val_rate)

        # save data frames
        print('Save meta frames on {}'.format(' '.join(self.frame_file_names)))
        self.save_meta(self.frame_file_names, self.meta_path, self._meta,
                       train_meta, val_meta)

        # save speaker map as json
        spk_json_path = os.path.join(self.meta_path, 'speaker_map.json')
        with open(spk_json_path, 'w') as w:
            json.dump(speaker_mappings, w)
Beispiel #8
0
    def make_meta(self,
                  root_dir: str,
                  wav_file_list: List[str] = None,
                  txt_file_list: List[str] = None):
        # composed by three keys
        # train or valid
        # ㄴ id
        # ㄴ ㄴ data type

        # lookup files
        print('Lookup if not provided lists')
        if not wav_file_list:
            wav_file_list = glob.glob(
                os.path.join(root_dir, '*', 'wav', '*.wav'))
        if not txt_file_list:
            txt_file_list = glob.glob(
                os.path.join(root_dir, '*', 'txt', '*.txt'))

        infos = {'speaker': {}, 'audio_filename': {}, 'text': {}}

        # match
        wav_match_dict = {
            os.path.basename(wav_file_path).replace('.wav', ''): wav_file_path
            for wav_file_path in wav_file_list
        }
        txt_match_dict = {
            os.path.basename(txt_file_path).replace('.txt', ''): txt_file_path
            for txt_file_path in txt_file_list
        }

        print('Mapping information with their keys')
        for key, wav_file_path in tqdm(wav_match_dict.items()):
            if key in txt_match_dict:
                speaker = wav_file_path.split('/')[-3]
                infos['speaker'][key] = speaker
                infos['audio_filename'][key] = wav_file_path
                infos['text'][key] = txt_match_dict[key]

        print('Matching is completed ...')

        # change meta obj
        self._meta = pd.DataFrame(infos)

        # make speaker as indices
        speaker_mappings = {
            spk: idx
            for idx, spk in enumerate(
                sorted(list(set(self._meta['speaker'].values))))
        }
        # update infos
        self._meta['speaker'] = [
            speaker_mappings[spk] for spk in self._meta['speaker'].values
        ]
        self._meta['pass'] = [True] * len(self._meta)

        # read duration
        print('Check durations on wave files ...')
        dur_list = self._process_duration(self._meta['audio_filename'].values,
                                          settings.MIN_WAV_RATE,
                                          settings.MAX_WAV_RATE)
        self._meta['duration'] = dur_list

        # text process
        print('Text pre-process ... ')
        self._process_txt(self._meta['text'].values, dur_list, 0.0)

        # filter passed rows
        self._meta = self._meta[self._meta['pass'].values]

        # split train / val
        print('Make train / val meta')
        train_meta, val_meta = split_train_val_frame(self._meta, val_rate=0.1)

        # save data frames
        print('Save meta frames on {}'.format(' '.join(self.frame_file_names)))
        self.save_meta(self.frame_file_names, self.meta_path, self._meta,
                       train_meta, val_meta)
Beispiel #9
0
    def libri_light2(self,
                     all_meta: str,
                     out_dir: str,
                     min_duration: float = 2,
                     max_duration: float = 15.):
        # mkdir
        chunk_dir = os.path.join(out_dir, 'chunks')
        meta_dir = os.path.join(out_dir, 'meta')
        os.makedirs(chunk_dir, exist_ok=True)
        os.makedirs(meta_dir, exist_ok=True)

        # load meta
        df = pd.read_json(all_meta)

        # split df into two frames
        df_under = df[df['duration'] <= max_duration]
        df_upper = df[df['duration'] > max_duration]

        # chunking
        upper_input_list = df_upper['audio_filename']
        upper_output_list = [
            os.path.join(chunk_dir, os.path.basename(p))
            for p in upper_input_list
        ]

        print('Start Audio Processing ...')
        results = Parallel(n_jobs=__class__.num_workers)(
            delayed(split_and_save)(*args, min_duration * settings.SAMPLE_RATE,
                                    max_duration * settings.SAMPLE_RATE)
            for args in tqdm(
                list(zip(upper_input_list, upper_output_list,
                         df['speaker']))))  # return list of tuples

        chunk_paths, chunk_lens, speakers = [], [], []
        for cp, cl, spk in results:
            chunk_paths.append(cp)
            chunk_lens.append(cl)
            speakers.append(spk)

        # spread out
        chunk_paths = [path for subset in chunk_paths for path in subset]
        chunk_lens = [
            chunk_len for subset in chunk_lens for chunk_len in subset
        ]
        speakers = [spk for subset in speakers for spk in subset]

        assert len(chunk_paths) == len(chunk_lens)

        # make df
        upper_chunk_info = {
            'audio_filename': chunk_paths,
            'duration': chunk_lens,
            'speaker': speakers,
            'pass': [True] * len(chunk_paths)
        }
        upper_chunks_df = pd.DataFrame(upper_chunk_info)

        # update indices
        df_under.index = pd.Index(list(range(len(df_under))))
        upper_chunks_df.index = pd.Index(
            list(range(len(df_under),
                       len(df_under) + len(upper_chunks_df))))

        # combine with original df
        new_df = pd.concat([df_under, upper_chunks_df])

        # filter minimum sr
        new_df = new_df[new_df['duration'] >= min_duration]

        # make train/valid df
        print('Make train / val meta')
        train_meta, val_meta = split_train_val_frame(new_df, val_rate=0.1)

        # save data frames
        print('Save meta frames on {}'.format(' '.join(
            LibriLightMeta.frame_file_names)))
        LibriLightMeta.save_meta(LibriLightMeta.frame_file_names, meta_dir,
                                 new_df, train_meta, val_meta)
Beispiel #10
0
    def make_meta(self,
                  root_dir: str,
                  meta_dir: str = None,
                  filter_non_vocal: bool = True):
        if not meta_dir:
            meta_dir = MEDLEYDB_META_DIR

        # 1. load meta
        print('Load MedleyDB meta info')
        meta = load_meta(meta_dir)

        # 2. filter meta from given audio files
        meta_match_mixkey = {
            record['mix_filename'].replace('.wav', '.npy'): record
            for record in meta
        }

        # 3. load all mixture files
        print('Lookup mix files')
        mix_file_list = [
            p.replace('.wav', '.npy')
            for p in glob.glob(os.path.join(root_dir, '**', '**', '*.wav'))
        ]

        # 4. get mix/vocal pairs
        print('Matching mix / vocal pairs')
        pair_meta = get_mix_vocal_pairs(mix_file_list,
                                        meta_match_mixkey,
                                        ext='npy')

        if filter_non_vocal:
            pair_meta = {
                key: voices
                for key, voices in pair_meta.items() if voices
            }

        # Each songs have a vocal track or several vocal tracks.
        # So, If the song has several tracks, It can be merged as one file being able to load their own fastly.
        # "pair_meta" looks like, {"mixture_filename": ["first_vocal_filename", "second..."]}

        # Make a function to load and to merge vocal files
        mix_paths, voice_paths = map(list, zip(*pair_meta.items()))

        # do parallel
        print('Merging multi-vocal-tracks ...')
        results = go_multiprocess(load_and_merge_audios,
                                  list(pair_meta.items()))
        out_path_list, source_numbers = map(list, zip(*results))

        # make meta values
        filtered_zips = [
            (m, v, s)
            for m, v, s in zip(mix_paths, out_path_list, source_numbers)
            if v != -1
        ]
        mix_results, voice_results, source_numbers = map(
            list, zip(*filtered_zips))

        # make track length column
        voice_track_lengths = []
        for v, s in zip(voice_results, source_numbers):
            if s < 2:
                voice_track_lengths.append(s)
            else:
                voice_track_lengths.append(2)

        # make meta
        self._meta['mixture_filename'] = mix_results
        self._meta['voice_filename'] = voice_results
        self._meta['voice_tracks'] = voice_track_lengths

        print('Make train / val meta')
        train_meta, val_meta = split_train_val_frame(self._meta,
                                                     val_rate=0.1,
                                                     label_key='voice_tracks')

        print('Save meta frames on {}'.format(' '.join(self.frame_file_names)))
        self.save_meta(self.frame_file_names, self.meta_path, self._meta,
                       train_meta, val_meta)

        print('Done')