class TestVQVAE(unittest.TestCase): def setUp(self) -> None: # self.model2 = VAE(3, 10) self.model = VQVAE(3, 64, 512) def test_summary(self): print(summary(self.model, (3, 64, 64), device='cpu')) # print(summary(self.model2, (3, 64, 64), device='cpu')) def test_forward(self): print( sum(p.numel() for p in self.model.parameters() if p.requires_grad)) x = torch.randn(16, 3, 64, 64) y = self.model(x) print("Model Output size:", y[0].size()) # print("Model2 Output size:", self.model2(x)[0].size()) def test_loss(self): x = torch.randn(16, 3, 64, 64) result = self.model(x) loss = self.model.loss_function(*result, M_N=0.005) print(loss) def test_sample(self): self.model.cuda() y = self.model.sample(8, 'cuda') print(y.shape) def test_generate(self): x = torch.randn(16, 3, 64, 64) y = self.model.generate(x) print(y.shape)
def train_CIFAR10(opt): import torchvision.datasets as datasets import torchvision.transforms as transforms from torchvision.utils import make_grid from matplotlib import pyplot as plt params = get_config(opt.config) save_path = os.path.join( params['save_path'], datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')) os.makedirs(save_path, exist_ok=True) shutil.copy('models.py', os.path.join(save_path, 'models.py')) shutil.copy('train.py', os.path.join(save_path, 'train.py')) shutil.copy(opt.config, os.path.join(save_path, os.path.basename(opt.config))) cuda = torch.cuda.is_available() gpu_ids = [i for i in range(torch.cuda.device_count())] TensorType = torch.cuda.FloatTensor if cuda else torch.Tensor data_path = os.path.join(params['data_root'], 'cifar10') os.makedirs(data_path, exist_ok=True) train_dataset = datasets.CIFAR10(root=data_path, train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ])) val_dataset = datasets.CIFAR10(root=data_path, train=False, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ])) train_loader = DataLoader(train_dataset, batch_size=params['batch_size'] * len(gpu_ids), shuffle=True, num_workers=params['num_workers'], pin_memory=cuda) val_loader = DataLoader(val_dataset, batch_size=1, num_workers=params['num_workers'], pin_memory=cuda) data_variance = np.var(train_dataset.train_data / 255.0) encoder = Encoder(params['dim'], params['residual_channels'], params['n_layers'], params['d']) decoder = Decoder(params['dim'], params['residual_channels'], params['n_layers'], params['d']) vq = VectorQuantizer(params['k'], params['d'], params['beta'], params['decay'], TensorType) if params['checkpoint'] != None: checkpoint = torch.load(params['checkpoint']) params['start_epoch'] = checkpoint['epoch'] encoder.load_state_dict(checkpoint['encoder']) decoder.load_state_dict(checkpoint['decoder']) vq.load_state_dict(checkpoint['vq']) model = VQVAE(encoder, decoder, vq) if cuda: model = nn.DataParallel(model.cuda(), device_ids=gpu_ids) parameters = list(model.parameters()) opt = torch.optim.Adam([p for p in parameters if p.requires_grad], lr=params['lr']) for epoch in range(params['start_epoch'], params['num_epochs']): train_bar = tqdm(train_loader) for data, _ in train_bar: if cuda: data = data.cuda() opt.zero_grad() vq_loss, data_recon, _ = model(data) recon_error = torch.mean((data_recon - data)**2) / data_variance loss = recon_error + vq_loss.mean() loss.backward() opt.step() train_bar.set_description('Epoch {}: loss {:.4f}'.format( epoch + 1, loss.mean().item())) model.eval() data_val = next(iter(val_loader)) data_val, _ = data_val if cuda: data_val = data_val.cuda() _, data_recon_val, _ = model(data_val) plt.imsave(os.path.join(save_path, 'latest_val_recon.png'), (make_grid(data_recon_val.cpu().data) + 0.5).numpy().transpose(1, 2, 0)) plt.imsave(os.path.join(save_path, 'latest_val_orig.png'), (make_grid(data_val.cpu().data) + 0.5).numpy().transpose( 1, 2, 0)) model.train() torch.save( { 'epoch': epoch, 'encoder': encoder.state_dict(), 'decoder': decoder.state_dict(), 'vq': vq.state_dict(), }, os.path.join(save_path, '{}_checkpoint.pth'.format(epoch)))
#train_files, test_files = train_test_split(files, test_size=test_size, random_state=random_state) for i, mora_i in enumerate(mora_index_lists_for_model): if (i - 1) % 20 == 0: #test pass elif i % 20 == 0: #valid test_mora_index_lists.append(mora_i) else: train_mora_index_lists.append(mora_i) model = VQVAE().to(device) if args.model_path != '': model.load_state_dict(torch.load(args.model_path)) optimizer = optim.Adam(model.parameters(), lr=2e-3) #1e-3 start = time.time() beta = 0.3 # Reconstruction + KL divergence losses summed over all elements and batch def loss_function(recon_x, x, z, z_unquantized): MSE = F.mse_loss( recon_x.view(-1), x.view(-1, ), reduction='sum' ) #F.binary_cross_entropy(recon_x.view(-1), x.view(-1, ), reduction='sum') with torch.no_grad(): z_no_grad = z z_unquantized_no_grad = z_unquantized
def objective(trial): mora_index_lists = sorted( glob(join('data/basic5000/mora_index', "squeezed_*.csv"))) #mora_index_lists = mora_index_lists[:len(mora_index_lists)-5] # last 5 is real testset mora_index_lists_for_model = [ np.loaadtxt(path).reshape(-1) for path in mora_index_lists ] train_mora_index_lists = [] test_mora_index_lists = [] #train_files, test_files = train_test_split(files, test_size=test_size, random_state=random_state) for i, mora_i in enumerate(mora_index_lists_for_model): if (i - 1) % 20 == 0: #test pass elif i % 20 == 0: #valid test_mora_index_lists.append(mora_i) else: train_mora_index_lists.append(mora_i) num_lstm_layers = trial.suggest_int('num_lstm_layers', 1, 3) z_dim = trial.suggest_categorical('z_dim', [ 1, 2, 8, ]) num_class = trial.suggest_int('num_class', 2, 4) model = VQVAE(num_class=num_class, num_layers=num_lstm_layers, z_dim=z_dim).to(device) optimizer = optim.Adam(model.parameters(), lr=2e-3) #1e-3 start = time.time() # Reconstruction + KL divergence losses summed over all elements and batch def loss_function(recon_x, x, z, z_unquantized, beta=1): MSE = F.mse_loss( recon_x.view(-1), x.view(-1, ), reduction='sum' ) #F.binary_cross_entropy(recon_x.view(-1), x.view(-1, ), reduction='sum') vq_loss = F.mse_loss( z.view(-1), z_unquantized.detach().view(-1, ), reduction='sum') + beta * F.mse_loss( z.detach().view(-1), z_unquantized.view(-1, ), reduction='sum') #print(KLD) return MSE + vq_loss func_tensor = np.vectorize(torch.from_numpy) train_ratio = int(args.train_ratio * len(train_mora_index_lists)) #1 X_acoustic_train = [ X['acoustic']['train'][i] for i in range(len(X['acoustic']['train'])) ][:train_ratio] Y_acoustic_train = [ Y['acoustic']['train'][i] for i in range(len(Y['acoustic']['train'])) ][:train_ratio] train_mora_index_lists = [ train_mora_index_lists[i] for i in range(len(train_mora_index_lists)) ][:train_ratio] train_num = len(X_acoustic_train) X_acoustic_test = [ X['acoustic']['test'][i] for i in range(len(X['acoustic']['test'])) ] Y_acoustic_test = [ Y['acoustic']['test'][i] for i in range(len(Y['acoustic']['test'])) ] test_mora_index_lists = [ test_mora_index_lists[i] for i in range(len(test_mora_index_lists)) ] train_loader = [[ X_acoustic_train[i], Y_acoustic_train[i], train_mora_index_lists[i] ] for i in range(len(train_mora_index_lists))] test_loader = [[ X_acoustic_test[i], Y_acoustic_test[i], test_mora_index_lists[i] ] for i in range(len(test_mora_index_lists))] def train(epoch): model.train() train_loss = 0 for batch_idx, data in enumerate(train_loader): tmp = [] for j in range(2): tmp.append(torch.from_numpy(data[j]).to(device)) optimizer.zero_grad() recon_batch, z, z_unquantized = model(tmp[0], tmp[1], data[2]) loss = loss_function(recon_batch, tmp[1], z, z_unquantized) loss.backward() train_loss += loss.item() optimizer.step() del tmp if batch_idx % len(train_loader) == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx, train_num, 100. * batch_idx / train_num, loss.item())) print('====> Epoch: {} Average loss: {:.4f}'.format( epoch, train_loss / len(train_loader))) return train_loss / len(train_loader) def test(epoch): model.eval() test_loss = 0 f0_loss = 0 with torch.no_grad(): for i, data, in enumerate(test_loader): tmp = [] for j in range(2): tmp.append(torch.tensor(data[j]).to(device)) recon_batch, z, z_unquantized = model(tmp[0], tmp[1], data[2]) test_loss += loss_function(recon_batch, tmp[1], z, z_unquantized).item() f0_loss += calc_lf0_rmse( recon_batch.cpu().numpy().reshape(-1, 199), tmp[1].cpu().numpy().reshape(-1, 199), lf0_start_idx, vuv_start_idx) del tmp test_loss /= len(test_loader) print('====> Test set loss: {:.4f}'.format(test_loss)) return test_loss, f0_loss loss_list = [] test_loss_list = [] test_f0_erros = [] num_epochs = args.num_epoch for epoch in range(1, num_epochs + 1): loss = train(epoch) test_loss, f0_loss = test(epoch) print('epoch [{}/{}], loss: {:.4f} test_loss: {:.4f}'.format( epoch + 1, num_epochs, loss, test_loss)) # logging loss_list.append(loss) test_loss_list.append(test_loss) test_f0_erros.append(f0_loss) print(time.time() - start) if epoch % 5 == 0: torch.save( model.state_dict(), '{}/{}layers_zdim{}_model_{}.pth'.format( args.output_dir, num_lstm_layers, z_dim, epoch)) np.save( args.output_dir + '/{}layers_zdim{}_loss_list.npy'.format(num_lstm_layers, z_dim), np.array(loss_list)) np.save( args.output_dir + '/{}layers_zdim{}_test_loss_list.npy'.format( num_lstm_layers, z_dim), np.array(test_loss_list)) np.save( args.output_dir + '/{}layers_zdim{}_test_f0_loss_list.npy'.format( num_lstm_layers, z_dim), np.array(test_f0_erros)) return f0_loss