class VAEp: def __init__(self, filename=None, model=None): if model is not None: # error return self.session = tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) self.last_batch_size = 64 self.model = VAE(self.session, epoch=20, batch_size=self.last_batch_size, z_dim=20, dataset_name="mnist", checkpoint_dir=filename, result_dir="results", log_dir="logs") # build graph self.model.build_model() """ Loss Function """ # encoding # mu, sigma = self.model.encoder(self.inputs, is_training=False, reuse=True) # sampling by re-parameterization technique # self.z_fn = mu + sigma * tf.random_normal(tf.shape(mu), 0, 1, dtype=tf.float32) # launch the graph in a session self.model.saver = tf.train.Saver() could_load, checkpoint_counter = self.model.load(filename) print(" [*] Loading finished!") # self.invert_models = def_invert_models(self.net, layer='conv4', alpha=0.002) def encode_images(self, images, cond=None): channel_last = np.rollaxis(images, 1, 4) z = self.session.run(self.model.mu, feed_dict={self.model.inputs: channel_last}) return z def get_zdim(self): return self.model.z_dim def sample_at(self, z): samples = self.session.run(self.model.fake_images, feed_dict={self.model.z: z}) channel_first = np.rollaxis(samples, 3, 1) return channel_first
def get_model(): vae = VAE(DEVICE) vae.load(MODEL_PATH) vae = vae.to(DEVICE) return vae
from VAE import VAE import torch import numpy as np import matplotlib.pyplot as plt # Converts image into format for PLT # Then shows image def show(a): a = a.detach().cpu().numpy() a = np.squeeze(a) a = 0.5 * a + 0.5 plt.imshow(a, cmap='gray') plt.show() plt.close() # Make model and load parameters # In this case loaded for images with size 128, # Latent dim of 32 model = VAE(128, 32, CHANNELS=3, use_bn=True) model.load("VAEparams.pt") while (True): vec = np.random.randn(32) vec = torch.from_numpy(vec).float() result = model.decode(vec) show(result)
min_max_values[file_path] for file_path in file_paths ] print(file_paths) print(sampled_min_max_values) return sampled_spectrogrmas, sampled_min_max_values def save_signals(signals, save_dir, sample_rate=22050): for i, signal in enumerate(signals): save_path = os.path.join(save_dir, str(i) + ".wav") sf.write(save_path, signal, sample_rate) if __name__ == "__main__": vae = VAE.load("model") sound_generator = SoundGenerator(vae, HOP_LENGTH) with open(MIN_MAX_VALUES_PATH, "rb") as f: min_max_values = pickle.load(f) specs, file_paths = load_FSDD(SPECS_PATH) # sample spectrograms + min max values sampled_specs, sampled_min_max_values = select_spectrograms( specs, file_paths, min_max_values, 5) # generate audio for sampled spectrograms signals, _ = sound_generator.generate(sampled_specs, sampled_min_max_values)
file_paths = [] for root, _, file_names in os.walk(spectrogram_path): for file in file_names: file_path = os.path.join(root, file) spec = np.load(file_path) # (bins, frames, 1) x_train.append(spec) file_paths.append(file_path) x_train = np.array(x_train) x_train = x_train[[..., np.newaxis]] return x_train, file_paths def train(x_train, learning_rate, batch_size, num_epochs): autoencoder = VAE(input_shape=(256, 64, 1), conv_filters=(512, 256, 128, 64, 32), conv_kernel=(3, 3, 3, 3, 3), conv_strides=(2, 2, 2, 2, (2, 1)), latent_dim=128) autoencoder.summary() autoencoder.compile(learning_rate) autoencoder.train(x_train, batch_size, num_epochs) return autoencoder if __name__ == "__main__": x_train, _ = load_FSDD(SPECS_PATH) # x_train = x_train[:,:,:,np.newaxis] ae = train(x_train, LR, BATCH_S, N_EPOCHS) ae.save("model") ae_2 = VAE.load("model") ae_2.summary()