def __init__(self): os.environ["CUDA_VISIBLE_DEVICES"] = "0" x_train = load_all("nsynth", "organ_electronic", forceLoad=True) self.X_TRAIN = x_train self.samples = x_train.shape[1] self.channels = 1 self.kernel_size = 5 self.audio_shape = (self.samples, self.channels) self.latent_dim = 100 self.folder_name = "simplegannsynth" optimizer = Adam(0.0002, 0.5) # Build and compile the discriminator self.discriminator = self.build_discriminator() self.discriminator.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) # Build the generator self.generator = self.build_generator() # The generator takes noise as input and generates audio z = Input(shape=(self.latent_dim, )) audio = self.generator(z) # For the combined model we will only train the generator self.discriminator.trainable = False # The discriminator takes generated audio as input and determines validity validity = self.discriminator(audio) # The combined model (stacked generator and discriminator) # Trains the generator to fool the discriminator self.combined = Model(z, validity) self.combined.compile(loss='binary_crossentropy', optimizer=optimizer)
def __init__(self): os.environ["CUDA_VISIBLE_DEVICES"] = "0" x_train = load_all("categorized", "cat", forceLoad=True, framerate=32768) self.X_TRAIN = x_train self.samples = x_train.shape[1] self.channels = 1 self.kernel_size = 5 self.audio_shape = (self.samples, self.channels) self.latent_dim = 100 self.folder_name = "wganbatchnorm" # Following parameter and optimizer set as recommended in paper self.n_critic = 5 optimizer = RMSprop(lr=0.00005) # Build the generator and critic self.generator = self.build_generator() self.critic = self.build_critic() #------------------------------- # Construct Computational Graph # for the Critic #------------------------------- # Freeze generator's layers while training critic self.generator.trainable = False # Image input (real sample) real_clip = Input(shape=self.audio_shape) # Noise input z_disc = Input(shape=(100, )) # Generate image based of noise (fake sample) fake_clip = self.generator(z_disc) # Discriminator determines validity of the real and fake images fake = self.critic(fake_clip) valid = self.critic(real_clip) # Construct weighted average between real and fake images interpolated_clip = RandomWeightedAverage()([real_clip, fake_clip]) # Determine validity of weighted sample print("Look at meeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee:") print(interpolated_clip) validity_interpolated = self.critic(interpolated_clip) # Use Python partial to provide loss function with additional # 'averaged_samples' argument partial_gp_loss = partial(self.gradient_penalty_loss, averaged_samples=interpolated_clip) partial_gp_loss.__name__ = 'gradient_penalty' # Keras requires function names self.critic_model = Model(inputs=[real_clip, z_disc], outputs=[valid, fake, validity_interpolated]) self.critic_model.compile(loss=[ self.wasserstein_loss, self.wasserstein_loss, partial_gp_loss ], optimizer=optimizer, loss_weights=[1, 1, 10]) #------------------------------- # Construct Computational Graph # for Generator #------------------------------- # For the generator we freeze the critic's layers self.critic.trainable = False self.generator.trainable = True # Sampled noise for input to generator z_gen = Input(shape=(100, )) # Generate images based of noise img = self.generator(z_gen) # Discriminator determines validity valid = self.critic(img) # Defines generator model self.generator_model = Model(z_gen, valid) self.generator_model.compile(loss=self.wasserstein_loss, optimizer=optimizer)
import keras from pydub import AudioSegment from keras.models import Sequential, Model from keras.layers.advanced_activations import LeakyReLU from keras.layers import Input, Dense, UpSampling1D, Conv1D, Activation, Reshape, Flatten import numpy as np from playsound import play_sound, play_and_save_sound from audio_loader import load_all latent_dim = 100 #sound = AudioSegment.from_wav("input/speech_commands/bird/0a7c2a8d_nohash_1.wav") sound = load_all("categorized", "cat", forceLoad=True) play_and_save_sound(sound, "endtoend2", "original", upscale=False) sound = sound / 65536 sound = sound + 0.5 target = np.array(sound[0]) print(target.shape) input_shape = (1, target.shape[0]) print(input_shape) play_and_save_sound(sound, "endtoend2", "normalized", upscale=True) model = Sequential() model.add(Activation("relu", input_shape=input_shape)) #model.add(Conv1D(32, kernel_size=5, activation='selu', strides=2,padding="same")) #model.add(UpSampling1D())
strides=self._strides, *self._args, **self._kwargs)) self._model.add(Lambda(lambda x: x[:,0])) self._model.summary() super(Conv1DTranspose, self).build(input_shape) def call(self, x): return self._model(x) def compute_output_shape(self, input_shape): return self._model.compute_output_shape(input_shape) os.environ["CUDA_VISIBLE_DEVICES"]="0" latent_dim = 10 save_folder = "bassnsynthupsample" sound = load_all("nsynth", "bass_synthetic",forceLoad=True) save_sound(sound, save_folder, "original", upscale=False) #print("hows the sound") #print(len(sound[0])) sound = sound / 65536 #print(len(sound[0])) sound = sound + 0.5 #print(len(sound[0])) target = np.array(sound[0]) target = target.reshape(1, target.shape[0], target.shape[1]) #print(target.shape) input_shape = (1,target.shape[0]) #print(input_shape)