Example #1
0
    def track_top_models(self, mel_loss, gen_wav, model):
        """ Keeps track of top k models and saves them according to their current rank """
        for j, (l, g, m, m_n) in enumerate(self.top_k_models):
            print(f'{j} {l} {m} {m_n}')
        if len(self.top_k_models) < self.train_cfg[
                'keep_top_k'] or mel_loss < self.top_k_models[-1][0]:
            m_step = model.get_step()
            model_name = f'model_loss{mel_loss:#0.5}_step{m_step}_weights.pyt'
            self.top_k_models.append(
                (mel_loss, gen_wav, model.get_step(), model_name))
            self.top_k_models.sort(key=lambda t: t[0])
            self.top_k_models = self.top_k_models[:self.
                                                  train_cfg['keep_top_k']]
            model.save(self.paths.voc_top_k / model_name)
            all_models = get_files(self.paths.voc_top_k, extension='pyt')
            top_k_names = {m[-1] for m in self.top_k_models}
            for model_file in all_models:
                if model_file.name not in top_k_names:
                    print(f'removing {model_file}')
                    os.remove(model_file)
            pickle_binary(self.top_k_models,
                          self.paths.voc_top_k / 'top_k.pkl')

            for i, (mel_loss, g_wav, m_step,
                    m_name) in enumerate(self.top_k_models, 1):
                self.writer.add_audio(tag=f'Top_K_Models/generated_top_{i}',
                                      snd_tensor=g_wav,
                                      global_step=m_step,
                                      sample_rate=self.dsp.sample_rate)
Example #2
0
def ljspeech(path: Union[str, Path]):
    csv_file = get_files(path, extension='.csv')
    assert len(csv_file) == 1
    text_dict = {}
    with open(str(csv_file[0]), encoding='utf-8') as f:
        for line in f:
            split = line.split('|')
            text_dict[split[0]] = split[-1]
    return text_dict
Example #3
0
def main(config: dict, data_set_name: str):
    development_mode = False

    logger.info(config['data_path'])
    store = DataStore(path=config['data_path'], store_type='csv', store_name=data_set_name, freshness=0)

    logger.debug('Start data extraction process...')
    if not store.check(key='data'):
        if not development_mode or not store.check(key='raw_data'):
            if not store.check(key='tables'):
                if not store.check(key='files'):
                    logger.debug('Retrieving all files...')
                    files = get_files(**config['file_config'])
                    saved = store.save(files, key='files')
                    if not saved:
                        logger.warning('No files found. Check logs.')
                        return False
                else:
                    files = store.load(key='files')
                logger.debug('Retrieving all tables...')
                tables = get_tables(files, **config['sheet_config'])
                saved = store.save(tables, key='tables')
                if not saved:
                    logger.warning('No files could be opened. Check logs.')
                    return False
            else:
                tables = store.load(key='tables')
            logger.debug('Retrieving raw data...')
            raw_data = get_excel_data(tables, config['sheet_config'], config['mappings'], config['meta_range'])

            if development_mode:
                store.save(raw_data['values'], key='raw_data')
                store.save(raw_data['type_info'], key='type_info')
        else:
            raw_data = {
                'values': store.load(key='raw_data'),
                'type_info': store.load(key='type_info'),
            }
        logger.debug('Processing raw data...')
        data = process_data(raw_data['values'], raw_data['type_info'], config['mappings'])
        store.save(data, key='data')
    else:
        data = store.load(key='data')

    logger.debug('Done!')

    if config['special']:
        try:
            logger.info('Writing the Excel Report for %s...' % data_set_name)
            config['report_function'](data, data_set_name)
        except:
            logger.exception('Writing the Excel Report failed:')

    return data
Example #4
0
def nick(path) :

    csv_file = get_files(path, extension='.csv')

    assert len(csv_file) == 1

    text_dict = {}

    with open(csv_file[0], encoding='utf-8') as f :
        for line in f :
            split = line.rstrip().split('|')
            text_dict[split[0]] = split[-1]

    return text_dict
Example #5
0
def ljspeech(path: Union[str, Path]):
    txt_file = get_files(path, extension='.csv')

    assert len(txt_file) == 1

    text_dict = {}

    with open(txt_file[0], encoding='utf-8') as f:
        for line in f:
            split = line.split('|')
            text_dict[split[0]] = split[-2]

    print('-' * 20 + 'Text dict' + '-' * 20)
    print(text_dict)
    return text_dict
Example #6
0
def ljspeech(path, data_dir):

    csv_file = get_files(path, extension='.csv')

    assert len(csv_file) == 1

    wavs = []
    #texts = []
    #encode = []

    with open(csv_file[0], encoding='utf-8') as f_:
        # if 'phoneme_cleaners' in hp.tts_cleaner_names:
        #     print("Cleaner : {} Language Code : {}\n".format(hp.tts_cleaner_names[0],hp.phoneme_language))
        #     for line in f :
        #         split = line.split('|')
        #         text_dict[split[0]] = text2phone(split[-1].strip(),hp.phoneme_language)
        # else:
        print("Cleaner : {} \n".format(hp.tts_cleaner_names))
        for line in f_:
            sub = {}
            split = line.split('|')
            t = split[-1].strip().upper()
            # t = t.replace('"', '')
            # t = t.replace('-', ' ')
            # t = t.replace(';','')
            # t = t.replace('(', '')
            # t = t.replace(')', '')
            # t = t.replace(':', '')
            # t = re.sub('[^A-Za-z0-9.!?,\' ]+', '', t)
            if len(t) > 0:
                wavs.append(split[0].strip())
                #texts.append(t)
                #encode.append(text_to_sequence(t, hp.tts_cleaner_names))
    # with open(os.path.join(data_dir, 'train.txt'), 'w', encoding='utf-8') as f:
    #     for w, t, e in zip(wavs, texts, encode):
    #         f.write('{}|{}|{}'.format(w,e,t) + '\n')

    return wavs  #, texts, encode
Example #7
0
def ljspeech(path: Union[str, Path], dt=False):
    csv_file = get_files(path, extension='.csv')
  
    if dt:
        for f in csv_file:
            if str(f).endswith('tmp_tts_train.csv'):
                csv_file = [f]
    else:
        for f in csv_file:
            if str(f).endswith('/cleaned_train_tts.csv'):
                csv_file = [f]
       
    text_dict = {}
    print(f'Using {csv_file} train file ...')
    # exit()
    with open(csv_file[0], encoding='utf-8') as f :
        for line in f :
            split = line.split('|')
            # print(split)
            text_dict[split[0]] = split[-1]
    # print(len(text_dict))
    # exit()
    return text_dict
            y, mu=2**hp.bits) if hp.mu_law else float_2_label(y, bits=hp.bits)
    elif hp.voc_mode == 'MOL':
        quant = float_2_label(y, bits=16)

    return mel.astype(np.float32), quant.astype(np.int64)


def process_wav(path: Path):
    wav_id = path.stem
    m, x = convert_file(path)
    np.save(paths.mel / f'{wav_id}.npy', m, allow_pickle=False)
    np.save(paths.quant / f'{wav_id}.npy', x, allow_pickle=False)
    return wav_id, m.shape[-1]


wav_files = get_files(path, extension)
paths = Paths(hp.data_path, hp.voc_model_id, hp.tts_model_id)

print(f'\n{len(wav_files)} {extension[1:]} files found in "{path}"\n')

if len(wav_files) == 0:

    print('Please point wav_path in hparams.py to your dataset,')
    print('or use the --path option.\n')

else:
    text_dict = ljspeech(path)
    with open(paths.data / 'text_dict.pkl', 'wb') as f:
        pickle.dump(text_dict, f)

    n_workers = max(1, args.num_workers)
Example #9
0
def ljspeech(path: Union[str, Path]):
    csv_file = get_files(path, extension='.txt')
    text_dict = {}
    clean_text = ['\t', '\n', '\x00']
    count_edge = 0
    first_case = 0
    lengthzero_underscore_reject = 0
    key_no_match = 0
    lengthzero_underscore_accept = 0
    zero_length = 0
    key_no_match_outlier = 0
    count = 0
    file_matching_counter = {}
    for i in range(len(csv_file)):
        with open(csv_file[i], encoding='utf-8') as f :
            for line in f :
                split = line.split('\t')
                #space between key and text
                if len(split)>1:
                    for i in range(len(split)):
                        for remove_word in clean_text:
                            if remove_word in split[i]:
                                split[i] = re.sub(remove_word, '', split[i])
                    text_dict[split[0]] = split[-1]
                    first_case += 1
                else:
                    # if count_edge < 10000:
                        if len(split)>1:
                            print('check length 1+')
                        split = split[0]
                        for remove_word in clean_text:
                            if remove_word in split:
                                split = re.sub(remove_word, '', split)

                        if len(split) > 0:
                            count += 1
                            #key in underscore
                            underscore_split = split.split('_')
                            if len(underscore_split)>2:
                                key_text_mix = underscore_split[2:][0]
                                current_key = ''
                                for ch in key_text_mix:
                                    if ch.isdigit():
                                        current_key += ch
                                    else:
                                        break

                                key = underscore_split[0] + '_' + underscore_split[1] + '_' + current_key
                                if len(key_text_mix[len(current_key):]) > 0:
                                    text_dict[key] = key_text_mix[len(current_key):]
                                    lengthzero_underscore_accept += 1
                                else:
                                    lengthzero_underscore_reject += 1
                            #key not corresponding to file names
                            else:
                                # if count_edge <10:
                                    key_from_file = str(csv_file[i]).split('/')[-1][:-4]
                                    if key_from_file in file_matching_counter:
                                        file_matching_counter[key_from_file] += 1
                                    else:
                                        file_matching_counter[key_from_file] = 1
                                    current_key = key_from_file + '_' +     str(file_matching_counter[key_from_file])
                                    # print(current_key, split, csv_file[i], key_from_file)
                                    # print(current_key, split.split('.'))
                                    if len(split.split('.')) == 2:
                                        text_dict[current_key] = split.split('.')[-1]
                                        key_no_match += 1
                                    else:
                                        key_no_match_outlier += 1
                        else:
                            zero_length += 1
    print('first_case:', first_case)
    print('key_no_match:', key_no_match)
    print('lengthzero_underscore_reject:',lengthzero_underscore_reject)
    print('lengthzero_underscore_accept:',lengthzero_underscore_accept)
    print('zero_length:', zero_length)
    print('key_no_match_outlier:',key_no_match_outlier)
    print('total:', first_case+key_no_match+lengthzero_underscore_reject+lengthzero_underscore_accept+key_no_match_outlier)
    return text_dict
Example #10
0
import os
import hparams as hp
from utils.files import get_files

if __name__ == '__main__':
    min_e = []
    min_p = []
    max_e = []
    max_p = []
    nz_min_p = []
    nz_min_e = []

    energy_path = os.path.join(hp.data_dir, 'energy')
    pitch_path = os.path.join(hp.data_dir, 'pitch')
    mel_path = os.path.join(hp.data_dir, 'mels')
    energy_files = get_files(energy_path, extension='.npy')
    pitch_files = get_files(pitch_path, extension='.npy')
    mel_files = get_files(mel_path, extension='.npy')

    assert len(energy_files) == len(pitch_files) == len(mel_files)

    for f in energy_files:
        e = np.load(f)
        min_e.append(e.min())
        nz_min_e.append(e[e > 0].min())
        max_e.append(e.max())

    for f in pitch_files:
        p = np.load(f)
        min_p.append(p.min())
        nz_min_p.append(p[p > 0].min())
Example #11
0
def process_wav_test(input_paths):
    input_clean_test_path, input_noisy_test_path = input_paths
    print("Processing -> {0} vs {1}".format(input_clean_test_path,
                                            input_noisy_test_path))
    if input_clean_test_path.stem != input_noisy_test_path.stem:
        raise RuntimeError("Those are different samples!{0} vs {1}".format(
            input_clean_test_path, input_noisy_test_path))
    wav_id = input_clean_test_path.stem
    m, x = convert_file(input_clean_test_path, input_noisy_test_path)
    np.save(paths.test_mel / f'{wav_id}.npy', m, allow_pickle=False)
    np.save(paths.test_quant / f'{wav_id}.npy', x, allow_pickle=False)
    return wav_id, m.shape[-1]


wav_files_clean = sorted(get_files(path_clean, extension),
                         key=lambda p: p.stem)
wav_files_noisy = sorted(get_files(path_noisy, extension),
                         key=lambda p: p.stem)
wav_files_clean_test = sorted(
    get_files(path_clean_test, extension),
    key=lambda p: p.stem) if path_clean_test is not None else None
wav_files_noisy_test = sorted(
    get_files(path_noisy_test, extension),
    key=lambda p: p.stem) if path_noisy_test is not None else None
paths = Paths(hp.data_path, hp.voc_model_id, hp.tts_model_id)

print(
    f'\n{len(wav_files_clean)} {extension[1:]} files found in "{path_clean}"\n'
)
Example #12
0
        else:
            raise ValueError(f'Unexpected voc mode {self.dsp.voc_mode}, should be either RAW or MOL.')
        return mel.astype(np.float32), quant.astype(np.int64), pitch.astype(np.float32)


parser = argparse.ArgumentParser(description='Preprocessing for WaveRNN and Tacotron')
parser.add_argument('--path', '-p', help='directly point to dataset path')
parser.add_argument('--num_workers', '-w', metavar='N', type=valid_n_workers, default=cpu_count()-1, help='The number of worker threads to use for preprocessing')
parser.add_argument('--config', metavar='FILE', default='config.yaml', help='The config containing all hyperparams.')
args = parser.parse_args()


if __name__ == '__main__':

    config = read_config(args.config)
    wav_files = get_files(args.path, '.wav')
    wav_ids = {w.stem for w in wav_files}
    paths = Paths(config['data_path'], config['voc_model_id'], config['tts_model_id'])
    print(f'\n{len(wav_files)} .wav files found in "{args.path}"')
    assert len(wav_files) > 0, f'Found no wav files in {args.path}, exiting.'

    text_dict = ljspeech(args.path)
    text_dict = {item_id: text for item_id, text in text_dict.items()
                 if item_id in wav_ids and len(text) > config['preprocessing']['min_text_len']}
    wav_files = [w for w in wav_files if w.stem in text_dict]
    print(f'Using {len(wav_files)} wav files that are indexed in metafile.\n')

    n_workers = max(1, args.num_workers)

    dsp = DSP.from_config(config)
Example #13
0
            y, mu=2**hp.bits) if hp.mu_law else float_2_label(y, bits=hp.bits)
    elif hp.voc_mode == 'MOL':
        quant = float_2_label(y, bits=16)

    return mel.astype(np.float32), quant.astype(np.int64)


def process_wav(path: Path):
    wav_id = path.split("/")[-1]
    m, x = convert_file(path)
    np.save(paths.mel / f'{wav_id}.npy', m, allow_pickle=False)
    np.save(paths.quant / f'{wav_id}.npy', x, allow_pickle=False)
    return wav_id, m.shape[-1]


wav_files = get_files(path, hp.book_names, hp.metadata, extension)
paths = Paths(hp.data_path, hp.voc_model_id, hp.tts_model_id)

print(wav_files[0])
print(f'\n{len(wav_files)} {extension[1:]} files found in "{path}"\n')

if len(wav_files) == 0:

    print('Please point wav_path in hparams.py to your dataset,')
    print('or use the --path option.\n')

else:

    if not hp.ignore_tts:

        text_dict = blizzard(path, hp.book_names, hp.metadata)
Example #14
0
def read_action(path):
    frame_names=files.get_files(path,append=True)
    frames=[cv2.imread(frame_name_i,cv2.IMREAD_GRAYSCALE) for frame_name_i in frame_names]
    return frames,frame_names