def test_FFTNetModel(self): print(" ---- Test FFTNetModel ----") # test only inputs net = FFTNetModel(hid_channels=256, out_channels=256, n_layers=11, cond_channels=None) inp = torch.rand(2, 1, 2048) out = net(inp) assert out.shape[0] == 2 assert out.shape[1] == 1 assert out.shape[2] == 256 # test cond input net = FFTNetModel(hid_channels=256, out_channels=256, n_layers=11, cond_channels=80) inp = torch.rand(2, 1, 2048) c_inp = torch.rand(2, 80, 2048) out = net(inp, c_inp) assert out.shape[0] == 2 assert out.shape[1] == 1 assert out.shape[2] == 256 # test cond input net = FFTNetModel(hid_channels=256, out_channels=256, n_layers=10, cond_channels=80) inp = torch.rand(2, 1, 2048) c_inp = torch.rand(2, 80, 2048) out = net(inp, c_inp) assert out.shape[0] == 2 assert out.shape[1] == 1025 assert out.shape[2] == 256
def evaluate(epoch, ema): avg_loss = 0.0 epoch_time = 0 # progbar = Progbar(len(val_loader.dataset) // c.eval_batch_size) ema_model = FFTNetModel(hid_channels=256, out_channels=256, n_layers=c.num_quant, cond_channels=80) ema_model = ema.assign_ema_model(model, ema_model, use_cuda) ema_model.eval() with torch.no_grad(): for num_iter, batch in enumerate(train_loader): start_time = time.time() wav = batch[0].unsqueeze(1) mel = batch[1].transpose(1, 2) lens = batch[2] target = batch[3] if use_cuda: wav = wav.cuda() mel = mel.cuda() target = target.cuda() current_step = num_iter + epoch * len(train_loader) + 1 out = ema_model(wav, mel) loss, fp, tp = criterion(out, target, lens) step_time = time.time() - start_time epoch_time += step_time avg_loss += loss.item() avg_loss /= num_iter return avg_loss
def test_train_step(self): print(" ---- Test the network backpropagation ----") model = FFTNetModel(hid_channels=256, out_channels=256, n_layers=11, cond_channels=80) inp = torch.rand(2, 1, 2048) c_inp = torch.rand(2, 80, 2048) criterion = torch.nn.L1Loss().to(device) model.train() model_ref = copy.deepcopy(model) count = 0 for param, param_ref in zip(model.parameters(), model_ref.parameters()): assert (param - param_ref).sum() == 0, param count += 1 optimizer = optim.Adam(model.parameters(), lr=0.0001) for i in range(5): out = model(inp, c_inp) optimizer.zero_grad() loss = criterion(out, torch.zeros(out.shape)) loss.backward() optimizer.step() # check parameter changes count = 0 for param, param_ref in zip(model.parameters(), model_ref.parameters()): # ignore pre-higway layer since it works conditional assert (param != param_ref).any(), "param {} with shape {} not updated!! \n{}\n{}".format(count, param.shape, param, param_ref) count += 1
def test_FFTNetModelStep(self): print(" ---- Test FFTNetModel step forward ----") net = FFTNetModel(hid_channels=256, out_channels=256, n_layers=11, cond_channels=80) time_start = time.time() for i in range(1024): x = torch.rand(1, 1, 1) cx = torch.rand(1, 80, 1) out = net.forward_step(x, cx) time_avg = (time.time() - time_start) / 1024 print("> Avg time per step inference on CPU: {}".format(time_avg)) assert abs(net.layers[0].buffer.queue1.sum().item()) > 0 assert abs(net.layers[0].buffer.queue2.sum().item()) == 0 # on GPU net = FFTNetModel(hid_channels=256, out_channels=256, n_layers=11, cond_channels=80) net.cuda() time_start = time.time() for i in range(1024): x = torch.rand(1, 1, 1) cx = torch.rand(1, 80, 1) out = net.forward_step(x.cuda(), cx.cuda()) time_avg = (time.time() - time_start) / 1024 print("> Avg time per step inference on GPU: {}".format(time_avg)) assert abs(net.layers[0].buffer.queue1.sum().item()) > 0 assert abs(net.layers[0].buffer.queue2.sum().item()) == 0 # check the second queue net = FFTNetModel(hid_channels=256, out_channels=256, n_layers=11, cond_channels=80) time_start = time.time() for i in range(1025): x = torch.rand(1, 1, 1) cx = torch.rand(1, 80, 1) out = net.forward_step(x, cx) assert abs(net.layers[0].buffer.queue1.sum().item()) > 0 assert abs(net.layers[0].buffer.queue2.sum().item()) > 0 assert abs(net.layers[0].buffer.queue2[:, :, :-1].sum().item()) == 0
args = parser.parse_args() c = load_config(args.config_path) # setup output paths and read configs _ = os.path.dirname(os.path.realpath(__file__)) OUT_PATH = os.path.join(_, c.output_path) OUT_PATH = create_experiment_folder(OUT_PATH, c.model_name, True) CHECKPOINT_PATH = os.path.join(OUT_PATH, 'checkpoints') shutil.copyfile(args.config_path, os.path.join(OUT_PATH, 'config.json')) # setup TensorBoard tb = SummaryWriter(OUT_PATH) # create the FFTNet model model = FFTNetModel(hid_channels=256, out_channels=256, n_layers=c.num_quant, cond_channels=80) criterion = MaskedCrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=c.lr) num_params = count_parameters(model) print(" > Models has {} parameters".format(num_params)) if use_cuda: model.cuda() criterion.cuda() # these two classes extend torch.utils.data.Dataset class to create the batches # the batches are tuples of three elements: wav, mels, audio file name train_dataset = LJSpeechDataset( os.path.join(c.data_path, "mels", "meta_fftnet_train.csv"),
import torch import time from tqdm import tqdm from model import FFTNet, FFTNetModel from generic_utils import count_parameters torch.manual_seed(1) use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if use_cuda: torch.backends.cudnn.benchmark = False print(" ---- Test FFTNetModel step forward ----") net = FFTNetModel(hid_channels=256, out_channels=256, n_layers=11, cond_channels=80) net.eval() print(" > Number of model params: ", count_parameters(net)) x = torch.rand(1, 1, 1) cx = torch.rand(1, 80, 1) time_start = time.time() with torch.no_grad(): for i in tqdm(range(20000)): out = net.forward_step(x, cx) time_avg = (time.time() - time_start) / 20000 print("> Avg time per step inference on CPU: {}".format(time_avg)) # on GPU net = FFTNetModel(hid_channels=256, out_channels=256,