def synthesize(z, rate=1): vqvae = VQVAE(num_layers=2, z_dim=1, num_class=4, input_linguistic_dim = 289+2).to(device) vqvae.load_state_dict(torch.load('static/model/vqvae_model_40.pth', map_location=torch.device(device))) data = [np.loadtxt('static/data/ling_F_chicago.csv'), np.loadtxt('static/data/acou_F_chicago.csv'), np.loadtxt('static/data/squeezed_mora_index_chicago.csv').reshape(-1),]#水をマレーシアから買わなくてはな #水をマレーシアから買わなくてはならないのですのデータ z_tf = np.array([class2value(int(cl), vqvae) for cl in z]).reshape(-1, 1) with torch.no_grad(): linguistic_f = data[0] linguistic_f = np.concatenate((linguistic_f[:, :285], linguistic_f[:, -4:], np.ones((linguistic_f.shape[0], 1)), np.zeros((linguistic_f.shape[0], 1))), axis=1) linguistic_f = torch.from_numpy(linguistic_f).float().to(device) pred_lf0 = vqvae.decode(torch.from_numpy(z_tf).float().to(device), linguistic_f, data[2], tokyo=False).cpu().numpy().reshape(-1) y_base = data[1].copy() y_base[:, lf0_start_idx] = pred_lf0 y_base[:, lf0_start_idx+1:lf0_start_idx+3] = 0 waveform = gen_waveform(y_base) filepath = './static/wav/BASIC5000_0001_{}.wav'.format(randomname(10)) wavfile.write(filepath, rate=int(fs*rate), data=waveform.astype(np.int16)) return filepath
def train(data_iterator, monitor, config, comm, args): monitor_train_loss, monitor_train_recon = None, None monitor_val_loss, monitor_val_recon = None, None if comm.rank == 0: monitor_train_loss = MonitorSeries( config['monitor']['train_loss'], monitor, interval=config['train']['logger_step_interval']) monitor_train_recon = MonitorImageTile(config['monitor']['train_recon'], monitor, interval=config['train']['logger_step_interval'], num_images=config['train']['batch_size']) monitor_val_loss = MonitorSeries( config['monitor']['val_loss'], monitor, interval=config['train']['logger_step_interval']) monitor_val_recon = MonitorImageTile(config['monitor']['val_recon'], monitor, interval=config['train']['logger_step_interval'], num_images=config['train']['batch_size']) model = VQVAE(config) if not args.sample_from_pixelcnn: if config['train']['solver'] == 'adam': solver = S.Adam() else: solver = S.momentum() solver.set_learning_rate(config['train']['learning_rate']) train_loader = data_iterator(config, comm, train=True) if config['dataset']['name'] != 'imagenet': val_loader = data_iterator(config, comm, train=False) else: val_loader = None else: solver, train_loader, val_loader = None, None, None if not args.pixelcnn_prior: trainer = VQVAEtrainer(model, solver, train_loader, val_loader, monitor_train_loss, monitor_train_recon, monitor_val_loss, monitor_val_recon, config, comm) num_epochs = config['train']['num_epochs'] else: pixelcnn_model = GatedPixelCNN(config['prior']) trainer = TrainerPrior(model, pixelcnn_model, solver, train_loader, val_loader, monitor_train_loss, monitor_train_recon, monitor_val_loss, monitor_val_recon, config, comm, eval=args.sample_from_pixelcnn) num_epochs = config['prior']['train']['num_epochs'] if os.path.exists(config['model']['checkpoint']) and (args.load_checkpoint or args.sample_from_pixelcnn): checkpoint_path = config['model']['checkpoint'] if not args.pixelcnn_prior else config['prior']['checkpoint'] trainer.load_checkpoint(checkpoint_path, msg='Parameters loaded from {}'.format( checkpoint_path), pixelcnn=args.pixelcnn_prior, load_solver=not args.sample_from_pixelcnn) if args.sample_from_pixelcnn: trainer.random_generate( args.sample_from_pixelcnn, args.sample_save_path) return for epoch in range(num_epochs): trainer.train(epoch) if epoch % config['val']['interval'] == 0 and val_loader != None: trainer.validate(epoch) if comm.rank == 0: if epoch % config['train']['save_param_step_interval'] == 0 or epoch == config['train']['num_epochs']-1: trainer.save_checkpoint( config['model']['saved_models_dir'], epoch, pixelcnn=args.pixelcnn_prior)
num_residual_layers = 2 embedding_dim = 64 num_embeddings = 512 commitment_cost = 0.25 vq_use_ema = False decay = 0.99 learning_rate = 3e-4 model = VQVAE(num_hiddens, num_residual_hiddens, num_residual_hiddens, embedding_dim, num_embeddings, commitment_cost, decay, use_ema=False) # Data Loading. train_dataset_iterator = ( tf.data.Dataset.from_tensor_slices( train_data_dict).map(cast_and_normalise_images).shuffle(10000).repeat( -1) # repeat indefinitely .batch(batch_size)).make_one_shot_iterator() valid_dataset_iterator = ( tf.data.Dataset.from_tensor_slices(valid_data_dict).map( cast_and_normalise_images).shuffle(10000).repeat(1) # 1 epoch .batch(batch_size)).make_initializable_iterator() train_dataset_batch = train_dataset_iterator.get_next()
def setUp(self) -> None: # self.model2 = VAE(3, 10) self.model = VQVAE(3, 64, 512)
np.loadtxt(path).reshape(-1) for path in mora_index_lists ] train_mora_index_lists = [] test_mora_index_lists = [] #train_files, test_files = train_test_split(files, test_size=test_size, random_state=random_state) for i, mora_i in enumerate(mora_index_lists_for_model): if (i - 1) % 20 == 0: #test pass elif i % 20 == 0: #valid test_mora_index_lists.append(mora_i) else: train_mora_index_lists.append(mora_i) model = VQVAE().to(device) if args.model_path != '': model.load_state_dict(torch.load(args.model_path)) optimizer = optim.Adam(model.parameters(), lr=2e-3) #1e-3 start = time.time() beta = 0.3 # Reconstruction + KL divergence losses summed over all elements and batch def loss_function(recon_x, x, z, z_unquantized): MSE = F.mse_loss( recon_x.view(-1), x.view(-1, ), reduction='sum' ) #F.binary_cross_entropy(recon_x.view(-1), x.view(-1, ), reduction='sum')
def train_CIFAR10(opt): import torchvision.datasets as datasets import torchvision.transforms as transforms from torchvision.utils import make_grid from matplotlib import pyplot as plt params = get_config(opt.config) save_path = os.path.join( params['save_path'], datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')) os.makedirs(save_path, exist_ok=True) shutil.copy('models.py', os.path.join(save_path, 'models.py')) shutil.copy('train.py', os.path.join(save_path, 'train.py')) shutil.copy(opt.config, os.path.join(save_path, os.path.basename(opt.config))) cuda = torch.cuda.is_available() gpu_ids = [i for i in range(torch.cuda.device_count())] TensorType = torch.cuda.FloatTensor if cuda else torch.Tensor data_path = os.path.join(params['data_root'], 'cifar10') os.makedirs(data_path, exist_ok=True) train_dataset = datasets.CIFAR10(root=data_path, train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ])) val_dataset = datasets.CIFAR10(root=data_path, train=False, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ])) train_loader = DataLoader(train_dataset, batch_size=params['batch_size'] * len(gpu_ids), shuffle=True, num_workers=params['num_workers'], pin_memory=cuda) val_loader = DataLoader(val_dataset, batch_size=1, num_workers=params['num_workers'], pin_memory=cuda) data_variance = np.var(train_dataset.train_data / 255.0) encoder = Encoder(params['dim'], params['residual_channels'], params['n_layers'], params['d']) decoder = Decoder(params['dim'], params['residual_channels'], params['n_layers'], params['d']) vq = VectorQuantizer(params['k'], params['d'], params['beta'], params['decay'], TensorType) if params['checkpoint'] != None: checkpoint = torch.load(params['checkpoint']) params['start_epoch'] = checkpoint['epoch'] encoder.load_state_dict(checkpoint['encoder']) decoder.load_state_dict(checkpoint['decoder']) vq.load_state_dict(checkpoint['vq']) model = VQVAE(encoder, decoder, vq) if cuda: model = nn.DataParallel(model.cuda(), device_ids=gpu_ids) parameters = list(model.parameters()) opt = torch.optim.Adam([p for p in parameters if p.requires_grad], lr=params['lr']) for epoch in range(params['start_epoch'], params['num_epochs']): train_bar = tqdm(train_loader) for data, _ in train_bar: if cuda: data = data.cuda() opt.zero_grad() vq_loss, data_recon, _ = model(data) recon_error = torch.mean((data_recon - data)**2) / data_variance loss = recon_error + vq_loss.mean() loss.backward() opt.step() train_bar.set_description('Epoch {}: loss {:.4f}'.format( epoch + 1, loss.mean().item())) model.eval() data_val = next(iter(val_loader)) data_val, _ = data_val if cuda: data_val = data_val.cuda() _, data_recon_val, _ = model(data_val) plt.imsave(os.path.join(save_path, 'latest_val_recon.png'), (make_grid(data_recon_val.cpu().data) + 0.5).numpy().transpose(1, 2, 0)) plt.imsave(os.path.join(save_path, 'latest_val_orig.png'), (make_grid(data_val.cpu().data) + 0.5).numpy().transpose( 1, 2, 0)) model.train() torch.save( { 'epoch': epoch, 'encoder': encoder.state_dict(), 'decoder': decoder.state_dict(), 'vq': vq.state_dict(), }, os.path.join(save_path, '{}_checkpoint.pth'.format(epoch)))
def objective(trial): mora_index_lists = sorted( glob(join('data/basic5000/mora_index', "squeezed_*.csv"))) #mora_index_lists = mora_index_lists[:len(mora_index_lists)-5] # last 5 is real testset mora_index_lists_for_model = [ np.loaadtxt(path).reshape(-1) for path in mora_index_lists ] train_mora_index_lists = [] test_mora_index_lists = [] #train_files, test_files = train_test_split(files, test_size=test_size, random_state=random_state) for i, mora_i in enumerate(mora_index_lists_for_model): if (i - 1) % 20 == 0: #test pass elif i % 20 == 0: #valid test_mora_index_lists.append(mora_i) else: train_mora_index_lists.append(mora_i) num_lstm_layers = trial.suggest_int('num_lstm_layers', 1, 3) z_dim = trial.suggest_categorical('z_dim', [ 1, 2, 8, ]) num_class = trial.suggest_int('num_class', 2, 4) model = VQVAE(num_class=num_class, num_layers=num_lstm_layers, z_dim=z_dim).to(device) optimizer = optim.Adam(model.parameters(), lr=2e-3) #1e-3 start = time.time() # Reconstruction + KL divergence losses summed over all elements and batch def loss_function(recon_x, x, z, z_unquantized, beta=1): MSE = F.mse_loss( recon_x.view(-1), x.view(-1, ), reduction='sum' ) #F.binary_cross_entropy(recon_x.view(-1), x.view(-1, ), reduction='sum') vq_loss = F.mse_loss( z.view(-1), z_unquantized.detach().view(-1, ), reduction='sum') + beta * F.mse_loss( z.detach().view(-1), z_unquantized.view(-1, ), reduction='sum') #print(KLD) return MSE + vq_loss func_tensor = np.vectorize(torch.from_numpy) train_ratio = int(args.train_ratio * len(train_mora_index_lists)) #1 X_acoustic_train = [ X['acoustic']['train'][i] for i in range(len(X['acoustic']['train'])) ][:train_ratio] Y_acoustic_train = [ Y['acoustic']['train'][i] for i in range(len(Y['acoustic']['train'])) ][:train_ratio] train_mora_index_lists = [ train_mora_index_lists[i] for i in range(len(train_mora_index_lists)) ][:train_ratio] train_num = len(X_acoustic_train) X_acoustic_test = [ X['acoustic']['test'][i] for i in range(len(X['acoustic']['test'])) ] Y_acoustic_test = [ Y['acoustic']['test'][i] for i in range(len(Y['acoustic']['test'])) ] test_mora_index_lists = [ test_mora_index_lists[i] for i in range(len(test_mora_index_lists)) ] train_loader = [[ X_acoustic_train[i], Y_acoustic_train[i], train_mora_index_lists[i] ] for i in range(len(train_mora_index_lists))] test_loader = [[ X_acoustic_test[i], Y_acoustic_test[i], test_mora_index_lists[i] ] for i in range(len(test_mora_index_lists))] def train(epoch): model.train() train_loss = 0 for batch_idx, data in enumerate(train_loader): tmp = [] for j in range(2): tmp.append(torch.from_numpy(data[j]).to(device)) optimizer.zero_grad() recon_batch, z, z_unquantized = model(tmp[0], tmp[1], data[2]) loss = loss_function(recon_batch, tmp[1], z, z_unquantized) loss.backward() train_loss += loss.item() optimizer.step() del tmp if batch_idx % len(train_loader) == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx, train_num, 100. * batch_idx / train_num, loss.item())) print('====> Epoch: {} Average loss: {:.4f}'.format( epoch, train_loss / len(train_loader))) return train_loss / len(train_loader) def test(epoch): model.eval() test_loss = 0 f0_loss = 0 with torch.no_grad(): for i, data, in enumerate(test_loader): tmp = [] for j in range(2): tmp.append(torch.tensor(data[j]).to(device)) recon_batch, z, z_unquantized = model(tmp[0], tmp[1], data[2]) test_loss += loss_function(recon_batch, tmp[1], z, z_unquantized).item() f0_loss += calc_lf0_rmse( recon_batch.cpu().numpy().reshape(-1, 199), tmp[1].cpu().numpy().reshape(-1, 199), lf0_start_idx, vuv_start_idx) del tmp test_loss /= len(test_loader) print('====> Test set loss: {:.4f}'.format(test_loss)) return test_loss, f0_loss loss_list = [] test_loss_list = [] test_f0_erros = [] num_epochs = args.num_epoch for epoch in range(1, num_epochs + 1): loss = train(epoch) test_loss, f0_loss = test(epoch) print('epoch [{}/{}], loss: {:.4f} test_loss: {:.4f}'.format( epoch + 1, num_epochs, loss, test_loss)) # logging loss_list.append(loss) test_loss_list.append(test_loss) test_f0_erros.append(f0_loss) print(time.time() - start) if epoch % 5 == 0: torch.save( model.state_dict(), '{}/{}layers_zdim{}_model_{}.pth'.format( args.output_dir, num_lstm_layers, z_dim, epoch)) np.save( args.output_dir + '/{}layers_zdim{}_loss_list.npy'.format(num_lstm_layers, z_dim), np.array(loss_list)) np.save( args.output_dir + '/{}layers_zdim{}_test_loss_list.npy'.format( num_lstm_layers, z_dim), np.array(test_loss_list)) np.save( args.output_dir + '/{}layers_zdim{}_test_f0_loss_list.npy'.format( num_lstm_layers, z_dim), np.array(test_f0_erros)) return f0_loss