def test_ljspeech_loader(self): print(" ---- Run data loader for 100 iterations ----") MAX = 10 RF = 11 C = load_config('test_conf.json') dataset = LJSpeechDataset(os.path.join(C.data_path, "mels", "meta_fftnet.csv"), os.path.join(C.data_path, "mels"), C.sample_rate, C.num_mels, C.num_freq, C.min_level_db, C.frame_shift_ms, C.frame_length_ms, C.preemphasis, C.ref_level_db, RF, C.min_wav_len, C.max_wav_len) dataloader = DataLoader(dataset, batch_size=2, shuffle=False, collate_fn=dataset.collate_fn, drop_last=True, num_workers=2) count = 0 last_T = 0 for data in dataloader: wavs = data[0] mels = data[1] print(" > iter: ", count) assert wavs.shape[1] >= last_T last_T = wavs.shape[1] assert wavs.shape[1] == mels.shape[1] assert wavs.shape[0] == mels.shape[0] assert wavs.shape[1] > RF assert wavs.max() > 0 and wavs.mean() > 0 count += 1 if count == MAX: break
parser = argparse.ArgumentParser() parser.add_argument( '--config_path', type=str, help='path to config file for training', ) parser.add_argument('--debug', type=bool, default=False, help='Stop asking for git hash before the run.') parser.add_argument('--finetune_path', type=str) args = parser.parse_args() c = load_config(args.config_path) # setup output paths and read configs _ = os.path.dirname(os.path.realpath(__file__)) OUT_PATH = os.path.join(_, c.output_path) OUT_PATH = create_experiment_folder(OUT_PATH, c.model_name, True) CHECKPOINT_PATH = os.path.join(OUT_PATH, 'checkpoints') shutil.copyfile(args.config_path, os.path.join(OUT_PATH, 'config.json')) # setup TensorBoard tb = SummaryWriter(OUT_PATH) # create the FFTNet model model = FFTNetModel(hid_channels=256, out_channels=256, n_layers=c.num_quant,
from multiprocessing import Pool parser = argparse.ArgumentParser() parser.add_argument('--data_path', type=str, help='Folder path to checkpoints.') parser.add_argument('--out_path', type=str, help='path to config file for training.') parser.add_argument('--config', type=str, help='conf.json file for run settings.') args = parser.parse_args() DATA_PATH = args.data_path OUT_PATH = args.out_path CONFIG = load_config(args.config) ap = AudioProcessor(CONFIG.sample_rate, CONFIG.num_mels, CONFIG.num_freq, CONFIG.min_level_db, CONFIG.frame_shift_ms, CONFIG.frame_length_ms, CONFIG.preemphasis, CONFIG.ref_level_db) def extract_mel(file_path): x, fs = sf.read(file_path) mel = ap.melspectrogram(x.astype('float32')) file_name = os.path.basename(file_path).replace(".wav","") mel_file = file_name + ".mel" np.save(os.path.join(OUT_PATH, mel_file), mel, allow_pickle=False) mel_len = mel.shape[1] wav_len = x.shape[0] return file_path, mel_file, str(wav_len), str(mel_len) glob_path = os.path.join(DATA_PATH, "**/*.wav")
parser = argparse.ArgumentParser() parser.add_argument('--restore_path', type=str, help='Folder path to checkpoints', default=0) parser.add_argument( '--config_path', type=str, help='path to config file for training', ) parser.add_argument('--debug', type=bool, default=False, help='do not ask for git has before run.') args = parser.parse_args() C = load_config(args.config) def train(): pass def evaluate(): pass def main(): if C.max_wav_len < model.receptive_field: raise RuntimeError(" > Max wav length {} cannot be smaller then\ the model receptive field {}.".format( c.max_wav_len, model.receptive_field))