Esempio n. 1
0
 def test_FFTNetModel(self):
     print(" ---- Test FFTNetModel ----")
     # test only inputs
     net = FFTNetModel(hid_channels=256, out_channels=256, n_layers=11, cond_channels=None)
     inp = torch.rand(2, 1, 2048)
     out = net(inp)
     assert out.shape[0] == 2
     assert out.shape[1] == 1
     assert out.shape[2] == 256
     # test cond input
     net = FFTNetModel(hid_channels=256, out_channels=256, n_layers=11, cond_channels=80)
     inp = torch.rand(2, 1, 2048)
     c_inp = torch.rand(2, 80, 2048)
     out = net(inp, c_inp)
     assert out.shape[0] == 2
     assert out.shape[1] == 1
     assert out.shape[2] == 256
     # test cond input
     net = FFTNetModel(hid_channels=256, out_channels=256, n_layers=10, cond_channels=80)
     inp = torch.rand(2, 1, 2048)
     c_inp = torch.rand(2, 80, 2048)
     out = net(inp, c_inp)
     assert out.shape[0] == 2
     assert out.shape[1] == 1025
     assert out.shape[2] == 256
Esempio n. 2
0
def evaluate(epoch, ema):
    avg_loss = 0.0
    epoch_time = 0
    # progbar = Progbar(len(val_loader.dataset) // c.eval_batch_size)
    ema_model = FFTNetModel(hid_channels=256,
                            out_channels=256,
                            n_layers=c.num_quant,
                            cond_channels=80)
    ema_model = ema.assign_ema_model(model, ema_model, use_cuda)
    ema_model.eval()
    with torch.no_grad():
        for num_iter, batch in enumerate(train_loader):
            start_time = time.time()
            wav = batch[0].unsqueeze(1)
            mel = batch[1].transpose(1, 2)
            lens = batch[2]
            target = batch[3]
            if use_cuda:
                wav = wav.cuda()
                mel = mel.cuda()
                target = target.cuda()
            current_step = num_iter + epoch * len(train_loader) + 1
            out = ema_model(wav, mel)
            loss, fp, tp = criterion(out, target, lens)
            step_time = time.time() - start_time
            epoch_time += step_time
            avg_loss += loss.item()
    avg_loss /= num_iter
    return avg_loss
Esempio n. 3
0
    def test_train_step(self):
        print(" ---- Test the network backpropagation ----")
        model = FFTNetModel(hid_channels=256, out_channels=256, n_layers=11, cond_channels=80)
        inp = torch.rand(2, 1, 2048)
        c_inp = torch.rand(2, 80, 2048)

        criterion = torch.nn.L1Loss().to(device)

        model.train()
        model_ref = copy.deepcopy(model)
        count = 0
        for param, param_ref in zip(model.parameters(), model_ref.parameters()):
            assert (param - param_ref).sum() == 0, param
            count += 1
        optimizer = optim.Adam(model.parameters(), lr=0.0001)
        for i in range(5):
            out = model(inp, c_inp)
            optimizer.zero_grad()
            loss = criterion(out, torch.zeros(out.shape))
            loss.backward()
            optimizer.step()
        # check parameter changes
        count = 0
        for param, param_ref in zip(model.parameters(), model_ref.parameters()):
            # ignore pre-higway layer since it works conditional
            assert (param != param_ref).any(), "param {} with shape {} not updated!! \n{}\n{}".format(count, param.shape, param, param_ref)
            count += 1
Esempio n. 4
0
    def test_FFTNetModelStep(self):
        print(" ---- Test FFTNetModel step forward ----")
        net = FFTNetModel(hid_channels=256, out_channels=256, n_layers=11, cond_channels=80)
        time_start = time.time()
        for i in range(1024):
            x = torch.rand(1, 1, 1)
            cx = torch.rand(1, 80, 1)
            out = net.forward_step(x, cx)
        time_avg = (time.time() - time_start) / 1024
        print("> Avg time per step inference on CPU: {}".format(time_avg))
        assert abs(net.layers[0].buffer.queue1.sum().item()) > 0
        assert abs(net.layers[0].buffer.queue2.sum().item()) == 0

        # on GPU
        net = FFTNetModel(hid_channels=256, out_channels=256, n_layers=11, cond_channels=80)
        net.cuda()
        time_start = time.time()
        for i in range(1024):
            x = torch.rand(1, 1, 1)
            cx = torch.rand(1, 80, 1)
            out = net.forward_step(x.cuda(), cx.cuda())
        time_avg = (time.time() - time_start) / 1024
        print("> Avg time per step inference on GPU: {}".format(time_avg))
        assert abs(net.layers[0].buffer.queue1.sum().item()) > 0
        assert abs(net.layers[0].buffer.queue2.sum().item()) == 0

        # check the second queue
        net = FFTNetModel(hid_channels=256, out_channels=256, n_layers=11, cond_channels=80)
        time_start = time.time()
        for i in range(1025):
            x = torch.rand(1, 1, 1)
            cx = torch.rand(1, 80, 1)
            out = net.forward_step(x, cx)
        assert abs(net.layers[0].buffer.queue1.sum().item()) > 0
        assert abs(net.layers[0].buffer.queue2.sum().item()) > 0
        assert abs(net.layers[0].buffer.queue2[:, :, :-1].sum().item()) == 0
Esempio n. 5
0
    args = parser.parse_args()
    c = load_config(args.config_path)

    # setup output paths and read configs
    _ = os.path.dirname(os.path.realpath(__file__))
    OUT_PATH = os.path.join(_, c.output_path)
    OUT_PATH = create_experiment_folder(OUT_PATH, c.model_name, True)
    CHECKPOINT_PATH = os.path.join(OUT_PATH, 'checkpoints')
    shutil.copyfile(args.config_path, os.path.join(OUT_PATH, 'config.json'))

    # setup TensorBoard
    tb = SummaryWriter(OUT_PATH)

    # create the FFTNet model
    model = FFTNetModel(hid_channels=256,
                        out_channels=256,
                        n_layers=c.num_quant,
                        cond_channels=80)
    criterion = MaskedCrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=c.lr)

    num_params = count_parameters(model)
    print(" > Models has {} parameters".format(num_params))

    if use_cuda:
        model.cuda()
        criterion.cuda()

    # these two classes extend torch.utils.data.Dataset class to create the batches
    # the batches are tuples of three elements: wav, mels, audio file name
    train_dataset = LJSpeechDataset(
        os.path.join(c.data_path, "mels", "meta_fftnet_train.csv"),
Esempio n. 6
0
import torch
import time
from tqdm import tqdm
from model import FFTNet, FFTNetModel
from generic_utils import count_parameters

torch.manual_seed(1)
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if use_cuda:
    torch.backends.cudnn.benchmark = False

print(" ---- Test FFTNetModel step forward ----")
net = FFTNetModel(hid_channels=256,
                  out_channels=256,
                  n_layers=11,
                  cond_channels=80)
net.eval()
print(" > Number of model params: ", count_parameters(net))
x = torch.rand(1, 1, 1)
cx = torch.rand(1, 80, 1)
time_start = time.time()
with torch.no_grad():
    for i in tqdm(range(20000)):
        out = net.forward_step(x, cx)
    time_avg = (time.time() - time_start) / 20000
    print("> Avg time per step inference on CPU: {}".format(time_avg))

# on GPU
net = FFTNetModel(hid_channels=256,
                  out_channels=256,