def train_VAE( gpu=GPU, dataset_fileName=f'{DATASET_SAVE_PATH}/wsj0_normalize_{N_FFT}_{HOP_LENGTH}.pic' ): file_suffix = f"normal-scale=gamma-D={N_LATENT}" if os.path.isfile(MODEL_SAVE_PATH + '/model-best-{0}.npz'.format(file_suffix)): print(f"{MODEL_SAVE_PATH}model-best-{file_suffix}.npz already exist") exit cuda.get_device_from_id(gpu).use() # Load dataset with open(dataset_fileName, 'rb') as f: dataset = pic.load(f) n_data = dataset.shape[1] # Prepare VAE model model = network_VAE.VAE(n_freq=int(N_FFT / 2 + 1), n_latent=N_LATENT) model.to_gpu() # Setup Optimizer optimizer = optimizers.Adam(LEARNING_RATE) optimizer.setup(model) # Learning loop min_loss = np.inf loss_list = [] for epoch in range(N_EPOCH): print('Epoch:', epoch + 1) sum_loss = 0 perm = np.random.permutation(n_data) for ii in progressbar(range(0, n_data, BATCH_SIZE)): minibatch = dataset[:, perm[ii:ii + BATCH_SIZE]].T scales = np.random.gamma(2, 0.5, (len(minibatch))) minibatch = minibatch * scales[:, None] x = chainer.Variable(cp.asarray(minibatch, dtype=cp.float32)) optimizer.update(model.get_loss_func(), x) sum_loss += float(model.loss.data) * BATCH_SIZE loss_list.append(float(model.loss.data)) sum_loss /= n_data print("Loss:", sum_loss) print('save the model and optimizer') serializers.save_npz( MODEL_SAVE_PATH + 'model-{0}.npz'.format(file_suffix), model) with open(MODEL_SAVE_PATH + 'loss-{0}.pic'.format(file_suffix), 'wb') as f: pic.dump(loss_list, f) if sum_loss < min_loss: shutil.copyfile( MODEL_SAVE_PATH + 'model-{0}.npz'.format(file_suffix), MODEL_SAVE_PATH + 'model-best-{0}.npz'.format(file_suffix)) min_loss = sum_loss sum_loss = 0
parser.add_argument( '--n_fft', type= int, default= 1024, help='number of frequencies') parser.add_argument( '--n_noise', type= int, default= 1, help='number of noise') parser.add_argument( '--n_latent', type= int, default= 16, help='dimention of encoded vector') parser.add_argument( '--n_basis_noise', type= int, default= 64, help='number of basis of noise (MODE_noise=NMF)') parser.add_argument( '--init_SCM', type= str, default="obs", help='unit, obs, ILRMA') parser.add_argument( '--n_iteration', type= int, default= 30, help='number of iteration') parser.add_argument( '--n_Z_iteration', type= int, default= 30, help='number of update Z iteration') parser.add_argument( '--mode_update_Z', type= str, default="sampling", help='sampling, sampling2, backprop, backprop2, hybrid, hybrid2') parser.add_argument('--mode_update_parameter', type= str, default= "all", help='all, one_by_one') args = parser.parse_args() sys.path.append("../DeepSpeechPrior") import network_VAE model_fileName = "../DeepSpeechPrior/model-VAE-best-scale=gamma-D={}.npz".format(args.n_latent) speech_VAE = network_VAE.VAE(n_latent=args.n_latent) serializers.load_npz(model_fileName, speech_VAE) name_DNN = "VAE" if args.gpu < 0: import numpy as xp else: import cupy as xp print("Use GPU " + str(args.gpu)) cuda.get_device_from_id(args.gpu).use() speech_VAE.to_gpu() wav, fs = sf.read(args.input_fileName) wav = wav.T M = len(wav) for m in range(M):