class ModelCritic(tf.keras.Model): def __init__(self,input_dims): super().__init__() self.state_input=Input(shape=input_dims,name="state_input") self.fc1 = Dense(512,activation='elu',name='forward1',kernel_initializer=keras.initializers.RandomUniform(minval=-1./512,maxval=1./512)) self.fc2 = Dense(256,activation='elu',name='forward2',kernel_initializer=keras.initializers.RandomUniform(minval=-1./256,maxval=1./256)) self.fc3 = Dense(128,activation='elu',name='forward3',kernel_initializer=keras.initializers.RandomUniform(minval=-1./128,maxval=1./128)) self.value_func = Dense(1,activation='linear',name='value_func',kernel_initializer=keras.initializers.RandomUniform(minval=-3e-4,maxval=3e-4)) def call(self,input_data): #x = self.state_input(input_data) x = self.fc1(input_data) x = self.fc2(x) x = self.fc3(x) v = self.value_func(x) return v def setup(self,gamma=0.99): self.gamma = gamma self.optimizer = Adam(lr=hyperparam['critic_lr']) def learn(self,reward,prev_state,state): with tf.GradientTape() as tape: v_1 = self(prev_state,training=True) v = self(state,training=True) td = reward + self.gamma*v - v_1 c_loss = td**2 grads = tape.gradient(c_loss,self.trainable_variables) self.optimizer.apply_gradients(zip(grads,self.trainable_variables)) return td,c_loss
class World_01(World_00): def __init__(self): World_00.__init__(self) self.memory = deque(maxlen=2000) self.N_batch = 64 self.t_model = create_q_model(self.num_states, self.num_actions) self.discount_factor = 0.99 self.learning_rate = 0.001 self.optimizer = Adam(lr=self.learning_rate) def trial(self, flag_render=False): env_test_model_memory(self.memory, self.env, self.model, n_episodes=10, flag_render=flag_render) print(len(self.memory)) def train_memory(self): if len(self.memory) >= self.N_batch: memory_batch = random.sample(self.memory, self.N_batch) s_l,a_l,r_l,next_s_l,done_l = [np.array(x) for x in list_rotate(memory_batch)] model_w = self.model.trainable_variables with tf.GradientTape() as tape: Qsa_pred_l = self.model(s_l) a_l_onehot = tf.one_hot(a_l, self.num_actions) Qs_a_pred_l = tf.reduce_sum(a_l_onehot * Qsa_pred_l, axis=1) Qsa_tpred_l = self.t_model(next_s_l) Qsa_tpred_l = tf.stop_gradient(Qsa_tpred_l) max_Q_next_s_a_l = np.amax(Qsa_tpred_l, axis=-1) Qs_a_l = r_l + (1 - done_l) * self.discount_factor * max_Q_next_s_a_l loss = tf.reduce_mean(tf.square(Qs_a_l - Qs_a_pred_l)) grads = tape.gradient(loss, model_w) self.optimizer.apply_gradients(zip(grads, model_w))
class Agent(): def __init__(self, num_actions, gamma=Constants.discount_factor): self.actor_opt = Adam(lr=Constants.lr) self.critic_opt = Adam(lr=Constants.lr) self.actor = Actor(num_actions) self.critic = Critic() def state2vec(self, s): temp = list(s) temp.append(1) for param in Constants.state: if Constants.state[param].standardized: if Constants.state[param].type != "binary": temp[Constants.state[param]. index] = float(temp[Constants.state[param].index] - Constants.state[param].range[0] ) / Constants.state[param].range[1] return np.array([temp]) def act(self, state): prob = self.actor(self.state2vec(state)) dist = Categorical(probs=prob, dtype=tf.float32) action = dist.sample() return self.state2vec(action) def actor_loss(self, prob, action, temporal_diff): dist = Categorical(probs=prob, dytpe=dtype.float32) log_prob = dist.log_prob(action) loss = -log_prob * temporal_diff return loss def learn(self, cur_state, action, new_state, reward): cur_state = self.state2vec(cur_state) new_state = self.state2vec(new_state) with GradientTape() as actor_tape, GradientTape() as critic_tape: prob = self.actor(state, training=True) value = self.critic(state, training=True) value_new = self.critic(new_state, training=True) temporal_diff = reward + self.gamma * value_new - value actor_loss = self.actor_loss(prob, action, temporal_diff) critic_loss = temporal_diff**2 actor_grads = actor_tape.gradient(actor_loss, self.actor.trainable_variables) critic_grads = critic_tape.gradient(critic_loss, self.critic.trainable_variables) self.actor_opt.apply_gradients( zip(actor_grads, self.actor.trainable_variables)) self.critic_opt.apply_gradients( zip(critic_grads, self.critic.trainable_variables)) return actor_loss, critic_loss def save_agent(self, save_dir): saved_model.save(self.actor, save_dir + "actor") saved_model.save(self.critic, save_dir + "critic") def load_agent(self): self.actor = saved_model.load(Constants.load_model_dir + "actor") self.critic = saved_model.load(Constants.load_model_dir + "critic")
class ModelActor(tf.keras.Model): def __init__(self,input_dims,no_action=2): super().__init__() self.state_input=Input(shape=input_dims,name="state_input") self.fc1 = Dense(1024,activation='elu',name='forward1',kernel_initializer=keras.initializers.RandomUniform(minval=-1./1024,maxval=1./1024)) self.fc2 = Dense(512,activation='elu',name='forward2',kernel_initializer=keras.initializers.RandomUniform(minval=-1./512,maxval=1./512)) self.fc3 = Dense(256,activation='elu',name='forward3',kernel_initializer=keras.initializers.RandomUniform(minval=-1./256,maxval=1./256)) self.fc4 = Dense(128,activation='elu',name='forward4',kernel_initializer=keras.initializers.RandomUniform(minval=-1./128,maxval=1./128)) self.mu = Dense(no_action,activation='linear',name='mu1',kernel_initializer=keras.initializers.RandomUniform(minval=-3e-3,maxval=3e-3)) self.sigma = Dense(no_action,activation='linear',name='sigma1',kernel_initializer=keras.initializers.RandomUniform(minval=-3e-3,maxval=3e-3)) def call(self,input_data): #x = self.state_input(input_data) x = self.fc1(input_data) x = self.fc2(x) x = self.fc3(x) x = self.fc4(x) probmu = self.mu(x) probsigma = self.sigma(x) probsigma = tf.nn.softplus(probsigma) + 1e-5 return probmu,probsigma def setup(self,gamma=0.99): self.gamma = gamma self.optimizer = Adam(lr=hyperparam['actor_lr']) def act(self,state): probmu, probsigma = self(np.array(state)) dist = tfp.distributions.Normal(loc=probmu.numpy(),scale=probsigma.numpy()) action = dist.sample([1]) return action.numpy() def actor_loss(self,probmu,probsigma,actions,td): dist = tfp.distributions.Normal(loc=probmu,scale=probsigma) log_prob = dist.log_prob(actions + 1e-5) loss = -log_prob*td return loss def learn(self,prev_state,td): with tf.GradientTape() as tape: pm,ps = self(prev_state,training=True) action = self.act(prev_state) a_loss = self.actor_loss(pm,ps,action,td) grads = tape.gradient(a_loss,self.trainable_variables) self.optimizer.apply_gradients(zip(grads,self.trainable_variables)) return a_loss
def _create_actor_model(self): state_input = Input(shape=self.env.observation_space.shape) h1 = Dense(24)(state_input) output = Dense(self.env.action_space.shape[0], activation='softmax')(h1) model = Model(inputs=state_input, outputs=output) adam = Adam(lr=0.001) model.compile(loss="mse", optimizer=adam) self._action_gradient = tf.placeholder( tf.float32, [None, self.env.action_space.shape[0]]) weights = model.trainable_weights grads = tf.gradients(output, weights, -self._action_gradient) adam = tf.train.AdamOptimizer(.000001) self._optimize_actor = adam.apply_gradients(zip(grads, weights)) return state_input, model
t2 = tf.reshape( sync_in2, (sync_in2.shape[0], sync_in2.shape[1], sync_in2.shape[2], 1)) # t1.shape # t2.shape p1 = model(t1) p2 = model(t2) p1 = tf.concat([p1], 0) print(p1.numpy()) # sync_la = loader.encode_multihot(sync_la) # sync_la2 = loader.encode_multihot(sync_la2) # sync_la = tf.convert_to_tensor(sync_la) loss = tf.losses.binary_crossentropy(sync_la, p1) default_opt = Adam(learning_rate=1e-3) grad = tape.gradient(loss, model.trainable_variables) print(grad) default_opt.apply_gradients(zip(grad, model.trainable_variables)) pc = np.concatenate([p1, p2]) k = loader.encode_multihot(sync_la) np.array(k).shape tf.losses.binary_crossentropy(k, model(t1)).numpy().shape
class DCGAN: def __init__(self, channels=1, batchsize=50): # Dataset features: self.channels = channels self.freq_sample = 25 self.time_sample = 342 self.eeg_shape = (self.freq_sample, self.time_sample, self.channels) # Model specific parameters (Noise generation, Dropout for overfitting reduction, etc...): self.noise = 100 self.dropout = 0.25 self.alpha = 0.2 self.momentum = 0.8 self.batchsize = batchsize # Choosing Adam optimiser for both generator and discriminator to feed in to the model: self.optimiser = Adam(0.0002, 0.2) # Values from the EEG GAN paper found to be most optimal # Build both the Generator and Discriminator: # We will train the combined model this time, unlike standard GAN self.generator = self.make_generator() self.discriminator = self.make_discriminator() # Useful for creating a sample directory later self.dir = 'EEG_samples' def make_generator(self): ''' Creates a generator model that takes in randomly generated noise, then uses 3 upsampling layers to return an image that is fed into the discriminator which then distinguishes whether or not it is a real or fake one. Weights are adjusted accordingly such that it can eventually generate a real signal. :return: ''' model = Sequential() model.add(Dense(4 * 41 * 256, use_bias=False, input_shape=(self.noise,))) model.add(BatchNormalization(momentum=self.momentum)) model.add(LeakyReLU()) model.add(Reshape((4, 41, 256))) model.add(Conv2DTranspose(128, (5, 4), strides=(2, 2), padding='valid', use_bias=False)) model.add(BatchNormalization(momentum=self.momentum)) model.add(LeakyReLU()) model.add(Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='valid', use_bias=False)) model.add(BatchNormalization(momentum=self.momentum)) model.add(LeakyReLU()) model.add(Conv2DTranspose(self.channels, (5, 5), strides=(1, 2), padding='same', use_bias=False, activation='tanh')) # Using tanh for output also based on the EEG paper assert model.output_shape == (None, 25, 342, self.channels) # Prints a small summary of the network # model.summary() return model def make_discriminator(self): ''' Creates a discriminator model that distingushes the fed images from generator, and also is trained using a training loop (see below). The Discriminator is a simple 2 layer CNN that returns either a 'True' or 'False'. Values are then adjusted accordingly per epoch to update weights and biases such that it produces the right output (i.e. it can discriminate fake from real). :return: ''' model = Sequential() model.add(Conv2D(64, (5, 5), strides=(2, 2), padding='same', input_shape=[25, 342, self.channels])) model.add(BatchNormalization(momentum=self.momentum)) model.add(LeakyReLU(alpha=self.alpha)) model.add(Dropout(self.dropout)) model.add(Conv2D(128, (5, 5), strides=(1, 2), padding='same')) model.add(BatchNormalization(momentum=self.momentum)) model.add(LeakyReLU(alpha=self.alpha)) model.add(Dropout(self.dropout)) model.add(Flatten()) model.add(Dense(1)) assert model.output_shape == (None, 1) # Prints a small summary of the network # model.summary() return model def make_fakedata(self, noise_shape=100): ''' Generates the fake data by drawing random samples from a normal Gaussian distribution (which is what np.random.normal does). This is for the generator to use. :return: Generated signal, Noise np.array ''' noise = np.random.normal(0, 1, (noise_shape, self.noise)) gen_imgs = self.generator.predict(noise) return gen_imgs, noise def discriminator_loss(self, real_output, fake_output): ''' Defines the loss function for the descriminator. Uses cross entropy a.k.a (log-loss) helper function from Keras 'BinaryCrossEntropy'. Returns the combined loss ''' cross_entropy = BinaryCrossentropy(from_logits=True) real_loss = cross_entropy(tf.ones_like(real_output), real_output) # Zero output for real fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output) # One output for fake total_loss = real_loss + fake_loss return total_loss def generator_loss(self, fake_output): ''' Like the above but this time for generator... :return: Generator loss value ''' cross_entropy = BinaryCrossentropy(from_logits=True) return cross_entropy(tf.ones_like(fake_output), fake_output) # TF array of ones for real output @tf.function def train_step(self, images): ''' This training step function that follows from the official TensorFlow documentation. It is in the form of tf.function which allows it to be compiled, rather than compiling the combined models alone everytime. More specificially, it makes use of GradientTape() function to train both generator and discriminator separately. :return: Discriminator and Generator loss ''' # GradientTape allows us to do automatic differentiation handled by TensorFlow # Useful when doing back propagation obviously. It also watches all the differentiable # Variables noise = tf.random.normal([self.batchsize, self.noise]) with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape: generated_images = self.generator(noise, training=True) real_output = self.discriminator(images, training=True) fake_output = self.discriminator(generated_images, training=True) gen_loss = self.generator_loss(fake_output) disc_loss = self.discriminator_loss(real_output, fake_output) grad_gen = gen_tape.gradient(gen_loss, self.generator.trainable_variables) grad_disc = disc_tape.gradient(disc_loss, self.discriminator.trainable_variables) self.optimiser.apply_gradients(zip(grad_gen, self.generator.trainable_variables)) self.optimiser.apply_gradients(zip(grad_disc, self.discriminator.trainable_variables)) return disc_loss, gen_loss def train(self, dataset, epochs, sample_interval=100): ''' The training function that has a loop which trains the model on every epoch/iteration. Calls the train_step() compiled function which trains the combined model at the same time. ''' # Allows us to 'unpack' our dataset using .from_tensor_slices, shuffling it # and also batching it. gen_loss, disc_loss = [], [] g_tot, d_tot = [], [] data = tf.data.Dataset.from_tensor_slices(dataset.astype('float32'))\ .shuffle(dataset.shape[0]).batch(self.batchsize) for epoch in range(epochs): for image_batch in data: disc_loss_batch, gen_loss_batch = self.train_step(image_batch) gen_loss.append(gen_loss_batch) disc_loss.append(disc_loss_batch) g_loss = sum(gen_loss)/len(gen_loss) d_loss = sum(disc_loss)/len(disc_loss) g_tot.append(g_loss) d_tot.append(d_loss) if epoch % sample_interval == 0: print("epoch: {}, generator loss: {}, discriminator loss: {}".format (epoch, g_loss, d_loss)) # Allows us to generate the signal and get the fake one for a # Arbitrary trial number. Plots it and save it every sample_interval # Which is 100 in this case. generated_signal, _ = self.make_fakedata(noise_shape=100) trial_num, channel = 30, 0 real_signal = np.expand_dims(dataset[trial_num], axis=0) # Plots the generated samples for the selected channels. # Recall the channels are chosen during the Load_and_Preprocess Script # Here they just correspond to C3 only (channel 7 was selected). fig, axs = plt.subplots(1, 2) fig.suptitle('Comparison of Generated vs. Real Signal (Spectrogram) for one trial, one channel') fig.tight_layout() axs[0].imshow(generated_signal[0, :, :, channel], aspect='auto') axs[0].set_title('Generated Signal', size=10) axs[0].set_xlabel('Time Sample') axs[0].set_ylabel('Frequency Sample') axs[1].imshow(real_signal[0, :, :, channel], aspect='auto') axs[1].set_title('Fake Signal', size=10) axs[1].set_xlabel('Time Sample') axs[1].set_ylabel('Frequency Sample') plt.show() # Save the generated samples within the current working dir # in a folder called 'EEG Samples', every 100 epochs. if not os.path.exists(self.dir): os.makedirs(self.dir) plt.savefig("%s/%d.png" % (self.dir, epoch)) plt.close() # Plot the generator and discriminator losses for all the epochs plt.figure() plt.plot(g_tot, 'r') plt.plot(d_tot, 'b') plt.title('Loss history') plt.xlabel('Epochs') plt.ylabel('Loss') plt.legend(['Generator', 'Discriminator']) plt.grid() plt.show()
class CriticNet(): """ Critic Network for PPO """ def __init__(self, in_dim, out_dim, lr_, tau_, discount_factor): self.obs_dim = in_dim self.act_dim = out_dim self.lr = lr_ self.discount_factor = discount_factor self.tau = tau_ # initialize critic network and target self.network_1, self.network_2 = self.create_network( ), self.create_network() self.target_network_1, self.target_network_2 = self.create_network( ), self.create_network() self.optimizer1, self.optimizer2 = Adam(self.lr), Adam(self.lr) # copy the weights for initialization weights_ = self.network_1.get_weights(), self.network_2.get_weights() self.target_network_1.set_weights(weights_[0]) self.target_network_2.set_weights(weights_[1]) self.critic_loss = None def create_network(self): """ Create a Critic Network Model using Keras as a Q-value approximator function """ # input layer(observations and actions) input_obs = Input(shape=self.obs_dim) input_act = Input(shape=(self.act_dim, )) inputs = [input_obs, input_act] concat = Concatenate(axis=-1)(inputs) # hidden layer 1 h1_ = Dense(24, kernel_initializer=GlorotNormal(), kernel_regularizer=l2(0.01))(concat) h1_b = BatchNormalization()(h1_) h1 = Activation('relu')(h1_b) # hidden_layer 2 h2_ = Dense(16, kernel_initializer=GlorotNormal(), kernel_regularizer=l2(0.01))(h1) h2_b = BatchNormalization()(h2_) h2 = Activation('relu')(h2_b) # output layer(actions) output_ = Dense(1, kernel_initializer=GlorotNormal(), kernel_regularizer=l2(0.01))(h2) output_b = BatchNormalization()(output_) output = Activation('linear')(output_b) return Model(inputs, output) def train(self, obs, acts, target): """Train Q-network for critic on sampled batch """ with tf.GradientTape() as tape1: q1_values = self.network_1([obs, acts], training=True) critic_loss_1 = tf.reduce_mean(tf.math.square(q1_values - target)) critic_grad_1 = tape1.gradient( critic_loss_1, self.network_1.trainable_variables) # compute critic gradient self.optimizer1.apply_gradients( zip(critic_grad_1, self.network_1.trainable_variables)) with tf.GradientTape() as tape2: q2_values = self.network_2([obs, acts], training=True) critic_loss_2 = tf.reduce_mean(tf.math.square(q2_values - target)) critic_grad_2 = tape2.gradient( critic_loss_2, self.network_2.trainable_variables) # compute critic gradient self.optimizer2.apply_gradients( zip(critic_grad_2, self.network_2.trainable_variables)) tf.print("critic loss :", critic_loss_1, critic_loss_2) self.critic_loss = float(min(critic_loss_1, critic_loss_2)) # tf.print("critic loss :",critic_loss_1) # self.critic_loss = float(critic_loss_1) def target_update(self): """ soft target update for training target critic network """ weights, weights_t = self.network_1.get_weights( ), self.target_network_1.get_weights() for i in range(len(weights)): weights_t[i] = self.tau * weights[i] + (1 - self.tau) * weights_t[i] self.target_network_1.set_weights(weights_t) weights, weights_t = self.network_2.get_weights( ), self.target_network_2.get_weights() for i in range(len(weights)): weights_t[i] = self.tau * weights[i] + (1 - self.tau) * weights_t[i] self.target_network_2.set_weights(weights_t) def save_network(self, path): self.network_1.save_weights(path + '_critic1.h5') self.target_network_1.save_weights(path + '_critic1_t.h5') self.network_2.save_weights(path + '_critic2.h5') self.target_network_2.save_weights(path + '_critic2_t.h5') def load_network(self, path): self.network_1.load_weights(path + '_critic1.h5') self.target_network_1.load_weights(path + '_critic1_t.h5') self.network_2.load_weights(path + '_critic2.h5') self.target_network_2.load_weights(path + '_critic2_t.h5')
class WasserGAN_GP(): def __init__(self, channels=1, batchsize=32, task=1, subject=1): # Dataset features: self.channels = channels self.freq_sample = 25 self.time_sample = 342 self.eeg_shape = (self.freq_sample, self.time_sample, self.channels) # Model specific parameters (Noise generation, Dropout for overfitting reduction, etc...): self.noise = 100 self.dropout = 0.25 self.alpha = 0.2 self.momentum = 0.8 self.batchsize = batchsize self.critic_iter = 5 self.gp_weight = 10 # Choosing Adam optimiser for both generator and discriminator to feed in to the model: self.gen_optimiser = Adam( 0.0002, 0.2) # Values from the EEG GAN paper found to be most optimal self.critic_optimiser = RMSprop( 0.0005) # NOTE here we use a different optimiser for the critic # The RMSprop optimiser is more stable than the Adam in terms of stability for the WGAN # This is from the Wasserstein GAN paper # Build both the Generator and Discriminator: # We will train the combined model this time, unlike standard GAN self.generator = self.make_generator() self.critic = self.make_critic() # Useful for creating a sample directory later self.dir = 'EEG_samples' self.subject = subject # Used to store the subject data later self.task = task # Used to store the task data later def make_generator(self): ''' Creates a generator model that takes in randomly generated noise, then uses 3 upsampling layers to return an image that is fed into the discriminator which then distinguishes whether or not it is a real or fake one. Weights are adjusted accordingly such that it can eventually generate a real signal. :return: ''' model = Sequential() model.add( Dense(4 * 41 * 256, use_bias=False, input_shape=(self.noise, ))) model.add(BatchNormalization(momentum=self.momentum)) model.add(LeakyReLU()) model.add(Reshape((4, 41, 256))) model.add( Conv2DTranspose(128, (5, 4), strides=(2, 2), padding='valid', use_bias=False)) model.add(BatchNormalization(momentum=self.momentum)) model.add(LeakyReLU()) model.add( Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='valid', use_bias=False)) model.add(BatchNormalization(momentum=self.momentum)) model.add(LeakyReLU()) model.add( Conv2DTranspose(self.channels, (5, 5), strides=(1, 2), padding='same', use_bias=False, activation='tanh') ) # Using tanh for output also based on the EEG paper assert model.output_shape == (None, 25, 342, self.channels) # Prints a small summary of the network # model.summary() return model def make_critic(self): ''' This time, the discriminator is replaced with a critic that gives a score to the realness or fakeness of a signal. The Critic is similar to the Discriminator in DCGAN as it is a simple 2 layer CNN that returns a score. Values are then adjusted accordingly per epoch to update weights and biases such that it produces the right output (i.e. it can discriminate fake from real). NOTE that we also update the critic more than the generator for improved stability (see critic_iterations below). :return: ''' model = Sequential() model.add( Conv2D(64, (5, 5), strides=(2, 2), padding='same', input_shape=[25, 342, self.channels])) model.add(BatchNormalization()) model.add(LeakyReLU(alpha=self.alpha)) model.add(Dropout(self.dropout)) model.add(Conv2D(128, (5, 5), strides=(1, 2), padding='same')) model.add(BatchNormalization()) model.add(LeakyReLU(alpha=self.alpha)) model.add(Dropout(self.dropout)) model.add(Flatten()) model.add(Dense(1)) assert model.output_shape == (None, 1) # Prints a small summary of the network # model.summary() return model def make_fakedata(self, noise_shape=100): ''' Generates the fake data by drawing random samples from a normal Gaussian distribution (which is what np.random.normal does). This is for the generator to use. :return: Generated signal, Noise np.array ''' noise = np.random.normal(0, 1, (noise_shape, self.noise)) return self.generator(noise, training=False), noise def critic_loss(self, f_logits, r_logits): ''' Implementation of the critic and generator loss using the Wasserstein Loss Function. For the critic, it uses the average critic score 'tf- reduce_mean' of the fake signals (or logits, probability values using logistic regression) minus the average critic score of real signals. This is done in order to maximise the gap between scores of real and fake signals. :param f_logits: fake signal probability scores :param r_logits: real signal proability scores :return: Wasserstein Critic Loss Value ''' return reduce_mean(f_logits) - reduce_mean(r_logits) def generator_loss(self, f_logits): ''' Like critic loss, except the generator loss uses only the average critic score of fake signals rather than both for its update. An added benefit of the WGAN is that it learns whether the generator is performing or not. :param fake_logits: fake signal probability scores :return: Wasserstein Generator Loss Value ''' return -reduce_mean(f_logits) def gradient_penalty(self, critic, real_signal, fake_signal): ''' Gradient penalty is used instead of weight clipping to enforce the Lipschitz Constraint 'LC' (uniform continuitiy between loss functions). It also helps reduce exploding gradients. The GP term penalizes the model if the gradient norm moves away from 1 (This means that the functions are not 1-Lipschitz where gradient norms are different values.) :param discriminator: :param real_signal: :param fake_signal: :return: Gradient Penalty term added to critic loss ''' # Draw samples from a uniform distribution delta = tf.random.uniform([real_signal.shape[0], 1, 1, 1], 0., 1.) inter = real_signal + (delta * (real_signal - fake_signal)) # Use GradientTape to watch the gradient variables. with tf.GradientTape() as tape: tape.watch(inter) pred = critic(inter) # Uses the squared difference from 1 norm as the Gradient Penalty grad = tape.gradient(pred, inter)[0] gradient_l2_norm = tf.sqrt(tf.reduce_sum(tf.square(grad))) return reduce_mean(gradient_l2_norm) @tf.function def train_step(self, sig): ''' Similar to train_step in DCGAN however, recall that for the WGAN we train the critic over several iterations to improve stability, hence the term critic_iter. Also uses GradientTape() to watch over the trainable weights etc... :param sig: takes in the real signal :return: generator and discriminator loss ''' for _ in range(self.critic_iter): with tf.GradientTape() as disc_tape: noise = tf.random.normal([sig.shape[0], self.noise]) gen_sig = self.generator(noise, training=True) f_logits = self.critic(gen_sig, training=True) r_logits = self.critic(sig, training=True) critic_loss = self.critic_loss(f_logits, r_logits) gp = self.gradient_penalty(partial(self.critic, training=True), sig, gen_sig) critic_loss += self.gp_weight * gp disc_grads = disc_tape.gradient(critic_loss, self.critic.trainable_variables) self.critic_optimiser.apply_gradients( zip(disc_grads, self.critic.trainable_variables)) noise = tf.random.normal([sig.shape[0], self.noise]) with tf.GradientTape() as gen_tape: gen_sig = self.generator(noise, training=True) f_logits = self.critic(gen_sig, training=True) gen_loss = self.generator_loss(f_logits) gen_grads = gen_tape.gradient(gen_loss, self.generator.trainable_variables) self.gen_optimiser.apply_gradients( zip(gen_grads, self.generator.trainable_variables)) return critic_loss, gen_loss # training loop def train(self, dataset, epochs, sample_interval=100): ''' The training function that has a loop which trains the model on every epoch/iteration. Calls the train_step() compiled function which trains the combined model at the same time. ''' gen_loss, disc_loss = [], [] g_tot, d_tot = [], [] # Allows us to 'unpack' our dataset using .from_tensor_slices, shuffling it # and also batching it. data = tf.data.Dataset.from_tensor_slices(dataset.astype('float32')) \ .shuffle(dataset.shape[0]).batch(self.batchsize) # start training loop for epoch in range(epochs): for image_batch in data: disc_loss_batch, gen_loss_batch = self.train_step(image_batch) # Turn into Numpy Array disc_loss_batch = tf.reduce_mean( disc_loss_batch).numpy() / float(self.critic_iter) gen_loss_batch = tf.reduce_mean(gen_loss_batch).numpy() gen_loss.append(gen_loss_batch) disc_loss.append(disc_loss_batch) g_loss = sum(gen_loss) / len(gen_loss) d_loss = sum(disc_loss) / len(disc_loss) g_tot.append(g_loss) d_tot.append(d_loss) if epoch % sample_interval == 0: print("epoch: {}, generator loss: {}, discriminator loss: {}". format(epoch, g_loss, d_loss)) # Allows us to generate the signal and get the fake one for a # Arbitrary trial number. Plots it and save it every sample_interval # Which is 100 in this case. generated_signal, _ = self.make_fakedata(noise_shape=100) trial_num, channel = 30, 0 real_signal = np.expand_dims(dataset[trial_num], axis=0) # Plots the generated samples for the selected channels. # Recall the channels are chosen during the Load_and_Preprocess Script # Here they just correspond to C3 only (channel 7 was selected). fig, axs = plt.subplots(1, 2) fig.suptitle( 'Comparison of Generated vs. Real Signal (Spectrogram) for one trial, one channel' ) fig.tight_layout() axs[0].imshow(generated_signal[0, :, :, channel], aspect='auto') axs[0].set_title('Generated Signal', size=10) axs[0].set_xlabel('Time Sample') axs[0].set_ylabel('Frequency Sample') axs[1].imshow(real_signal[0, :, :, channel], aspect='auto') axs[1].set_title('Fake Signal', size=10) axs[1].set_xlabel('Time Sample') axs[1].set_ylabel('Frequency Sample') plt.show() # Save the generated samples within the current working dir # in a folder called 'EEG Samples', every 100 epochs. if not os.path.exists(self.dir): os.makedirs(self.dir) plt.savefig("%s/%d.png" % (self.dir, epoch)) plt.close() # Plot the generator and discriminator losses for all the epochs plt.figure() plt.plot(g_tot, 'r') plt.plot(d_tot, 'b') plt.title('Loss history') plt.xlabel('Epochs') plt.ylabel('Loss') plt.legend(['Generator', 'Discriminator']) plt.grid() plt.show() # Save subject and task data such that it can be used to generate # Fake samples later fp = os.path.join(os.getcwd(), 'EEG_Samples') sp = os.path.join( fp, 'Subject{}WGAN_Model_Data_For_Task{}.h5'.format( self.subject, self.task)) self.generator.save(sp)
class ActorNet(): """ Actor Network for PPO """ def __init__(self, in_dim, out_dim, act_range, lr_, tau_): self.obs_dim = in_dim self.act_dim = out_dim self.act_range = act_range self.lr = lr_; self.tau = tau_ # initialize actor network and target self.network = self.create_network() self.target_network = self.create_network() # initialize optimizer self.optimizer = Adam(self.lr) # copy the weights for initialization weights_ = self.network.get_weights() self.target_network.set_weights(weights_) def create_network(self): """ Create a Actor Network Model using Keras """ # input layer(observations) input_ = Input(shape=self.obs_dim) # hidden layer 1 h1_ = Dense(24,kernel_initializer=GlorotNormal())(input_) h1_b = BatchNormalization()(h1_) h1 = Activation('relu')(h1_b) # hidden_layer 2 h2_ = Dense(16,kernel_initializer=GlorotNormal())(h1) h2_b = BatchNormalization()(h2_) h2 = Activation('relu')(h2_b) # output layer(actions) output_ = Dense(self.act_dim,kernel_initializer=GlorotNormal())(h2) output_b = BatchNormalization()(output_) output = Activation('tanh')(output_b) scalar = self.act_range * np.ones(self.act_dim) out = Lambda(lambda i: i * scalar)(output) return Model(input_,out) def train(self, obs, critic): """ training Actor's Weights """ with tf.GradientTape() as tape: actions = self.network(obs) actor_loss = -tf.reduce_mean(critic([obs,actions])) tf.print("actor loss :",actor_loss) actor_grad = tape.gradient(actor_loss,self.network.trainable_variables) self.optimizer.apply_gradients(zip(actor_grad,self.network.trainable_variables)) def target_update(self): """ soft target update for training target actor network """ weights, weights_t = self.network.get_weights(), self.target_network.get_weights() for i in range(len(weights)): weights_t[i] = self.tau*weights[i] + (1-self.tau)*weights_t[i] self.target_network.set_weights(weights_t) def predict(self, obs): """ predict function for Actor Network """ return self.network.predict(np.expand_dims(obs, axis=0)) def target_predict(self, new_obs): """ predict function for Target Actor Network """ return self.target_network.predict(new_obs) def save_network(self, path): self.network.save_weights(path + '_actor.h5') self.target_network.save_weights(path +'_actor_t.h5') def load_network(self, path): self.network.load_weights(path + '_actor.h5') self.target_network.load_weights(path + '_actor_t.h5') print(self.network.summary())
def gan(self): # initialize a GAN trainer # this is the fastest way to train a GAN in Keras # two models are updated simutaneously in one pass noise = Input(shape=self.generator.input_shape[1:]) real_data = Input(shape=self.discriminator.input_shape[1:]) generated = self.generator(noise) gscore = self.discriminator(generated) rscore = self.discriminator(real_data) def log_eps(i): return K.log(i+1e-11) # single side label smoothing: replace 1.0 with 0.9 dloss = - K.mean(log_eps(1-gscore) + .1 * log_eps(1-rscore) + .9 * log_eps(rscore)) gloss = - K.mean(log_eps(gscore)) Adam = tf.train.AdamOptimizer lr,b1 = 1e-4,.2 # otherwise won't converge. optimizer = Adam(lr) grad_loss_wd = optimizer.compute_gradients(dloss, self.discriminator.trainable_weights) update_wd = optimizer.apply_gradients(grad_loss_wd) grad_loss_wg = optimizer.compute_gradients(gloss, self.generator.trainable_weights) update_wg = optimizer.apply_gradients(grad_loss_wg) def get_internal_updates(model): # get all internal update ops (like moving averages) of a model inbound_nodes = model.inbound_nodes input_tensors = [] for ibn in inbound_nodes: input_tensors+= ibn.input_tensors updates = [model.get_updates_for(i) for i in input_tensors] return updates other_parameter_updates = [get_internal_updates(m) for m in [self.discriminator,self.generator]] # those updates includes batch norm. print('other_parameter_updates for the models(mainly for batch norm):') print(other_parameter_updates) train_step = [update_wd, update_wg, other_parameter_updates] losses = [dloss,gloss] learning_phase = K.learning_phase() def gan_feed(sess,batch_image,z_input): # actual GAN trainer nonlocal train_step,losses,noise,real_data,learning_phase res = sess.run([train_step,losses],feed_dict={ noise:z_input, real_data:batch_image, learning_phase:True, # Keras layers needs to know whether # this run is training or testring (you know, batch norm and dropout) }) loss_values = res[1] return loss_values #[dloss,gloss] return gan_feed
class StyleGAN(): def _Get_Generator(self): x_out = [] z = Input(shape=(512, )) FC = Dense(512, activation='relu')(z) FC = Dense(512, activation='relu')(FC) FC = Dense(512, activation='relu')(FC) FC = Dense(512, activation='relu')(FC) FC = Dense(512, activation='relu')(FC) FC = Dense(512, activation='relu')(FC) FC = Dense(512, activation='relu')(FC) w = Dense(512, activation='relu')(FC) noise_inp = Input(shape=(self.img_height, self.img_width, 1)) x = self.const_tensor y_s, y_b = A_block(w, 512) noise = B_block(noise_inp, 512, 4) hidden = Add()([x, noise]) hidden = AdaIN()([hidden, y_b, y_s]) hidden = Activation('relu')(hidden) hidden, rgb = G_block(hidden, w, noise_inp, 256) x_out.append(rgb) hidden, rgb = G_block(hidden, w, noise_inp, 128) x_out.append(rgb) hidden, rgb = G_block(hidden, w, noise_inp, 64) x_out.append(rgb) hidden, rgb = G_block(hidden, w, noise_inp, 32) x_out.append(rgb) hidden, rgb = G_block(hidden, w, noise_inp, 16) x_out.append(rgb) hidden, rgb = G_block(hidden, w, noise_inp, 8) x_out.append(rgb) x_out = Add()(x_out) x_out = Activation('tanh')(x_out) model = Model([z, noise_inp], x_out) return model def _Get_Discriminator(self): x = Input(shape=(self.img_height, self.img_width, self.channels)) hidden = Conv2D(16, (1, 1), padding='same')(x) hidden = D_block(x, 16) hidden = D_block(hidden, 32) hidden = D_block(hidden, 64) hidden = D_block(hidden, 128) hidden = D_block(hidden, 256) hidden = D_block(hidden, 512) hidden = Flatten()(hidden) x_out = Dense(1)(hidden) model = Model(x, x_out) return model def _get_model(self): G = self._Get_Generator() D = self._Get_Discriminator() D.trainable = False z = Input(shape=(512, )) noise_inp = Input(shape=(self.img_height, self.img_width, 1)) hidden = G([z, noise_inp]) x_out = D(hidden) GD = Model([z, noise_inp], x_out) GD.summary() return G, D, GD def __init__(self, batch_size, img_height, img_width, channels, path): self.lamda = 10 self.reals = None self.z = None self.noise = None self.fakes = None self.batch_size = batch_size self.epoch = 0 self.path = path self.const = None self.load_const(path) self.img_height = img_height self.img_width = img_width self.channels = channels self.const_tensor = replicate(self.const, self.batch_size) self.Generator, self.Discriminator, self.Stacked_model = self._get_model( ) self.optimizer_D = Adam(learning_rate=0.0001, beta_1=0, beta_2=0.9) self.optimizer_G = Adam(learning_rate=0.0001, beta_1=0, beta_2=0.9) def train_on_batch_G(self): with tf.GradientTape() as tape: logits = self.Stacked_model([self.z, self.noise], training=True) loss_value = WGAN_loss_G(logits) grads = tape.gradient(loss_value, self.Generator.trainable_weights) self.optimizer_G.apply_gradients( zip(grads, self.Generator.trainable_weights)) return float(loss_value) def train_on_batch_D(self): with tf.GradientTape() as tape: self.fakes = self.Generator([self.z, self.noise], training=True) loss_value = WGAN_loss_D(self) grads = tape.gradient(loss_value, self.Discriminator.trainable_weights) self.optimizer_D.apply_gradients( zip(grads, self.Discriminator.trainable_weights)) return float(loss_value) def save_model(self, path): array = np.array( [self.epoch, self.img_height, self.img_width, self.batch_size], dtype=int) print("Backup...", end='') if os.path.isfile(os.path.join(path, 'Model.h5')): os.rename(os.path.join(path, 'Model.h5'), os.path.join(path, 'Model.bak')) os.rename(os.path.join(path, 'Generator.h5'), os.path.join(path, 'Generator.bak')) os.rename(os.path.join(path, 'Discriminator.h5'), os.path.join(path, 'Discriminator.bak')) print("Done!") print("Saving...", end='') with h5py.File(os.path.join(path, 'Model.h5'), 'w') as f: dset = f.create_dataset("model_details", data=array) self.Generator.save_weights(os.path.join(path, 'Generator.h5')) self.Discriminator.save_weights(os.path.join(path, 'Discriminator.h5')) print("Done!") def load_model(self, path): with h5py.File(os.path.join(path, 'Model.h5'), 'r') as f: data = f['model_details'] self.epoch = data[0] self.img_height, self.img_width = data[1], data[2] self.batch_size = data[3] self.Generator.load_weights(os.path.join(path, 'Generator.h5')) self.Discriminator.load_weights(os.path.join(path, 'Discriminator.h5')) def save_const(self, path): const = np.reshape(self.const, (4 * 4 * 512)) print(os.path.join(path, 'key_const.bin')) np.savetxt(os.path.join(path, 'key_const.bin'), const, delimiter=',') def load_const(self, path): if os.path.exists(os.path.join(path, 'key_const.bin')): const = np.loadtxt(os.path.join(path, 'key_const.bin'), delimiter=',') const = np.reshape(const, (4, 4, 512)) else: const = np.random.normal(size=(4, 4, 512)) self.const = const if not (os.path.exists(os.path.join(path, 'key_const.bin'))): self.save_const(path)
class CriticNet(): """ Critic Network for DDPG """ def __init__(self, in_dim, out_dim, lr_, tau_, discount_factor): self.obs_dim = in_dim self.act_dim = out_dim self.lr = lr_; self.discount_factor=discount_factor;self.tau = tau_ # initialize critic network and target self.network = self.create_network() self.target_network = self.create_network() self.optimizer = Adam(self.lr) # copy the weights for initialization weights_ = self.network.get_weights() self.target_network.set_weights(weights_) self.critic_loss = None def create_network(self): """ Create a Critic Network Model using Keras as a Q-value approximator function """ # input layer(observations and actions) input_obs = Input(shape=self.obs_dim) input_act = Input(shape=(self.act_dim,)) inputs = [input_obs,input_act] concat = Concatenate(axis=-1)(inputs) # hidden layer 1 h1_ = Dense(300, kernel_initializer=GlorotNormal(), kernel_regularizer=l2(0.01))(concat) h1_b = BatchNormalization()(h1_) h1 = Activation('relu')(h1_b) # hidden_layer 2 h2_ = Dense(400, kernel_initializer=GlorotNormal(), kernel_regularizer=l2(0.01))(h1) h2_b = BatchNormalization()(h2_) h2 = Activation('relu')(h2_b) # output layer(actions) output_ = Dense(1, kernel_initializer=GlorotNormal(), kernel_regularizer=l2(0.01))(h2) output_b = BatchNormalization()(output_) output = Activation('linear')(output_b) return Model(inputs,output) def Qgradient(self, obs, acts): acts = tf.convert_to_tensor(acts) with tf.GradientTape() as tape: tape.watch(acts) q_values = self.network([obs,acts]) q_values = tf.squeeze(q_values) return tape.gradient(q_values, acts) def train(self, obs, acts, target): """Train Q-network for critic on sampled batch """ with tf.GradientTape() as tape: q_values = self.network([obs, acts], training=True) td_error = q_values - target critic_loss = tf.reduce_mean(tf.math.square(td_error)) tf.print("critic loss :",critic_loss) self.critic_loss = float(critic_loss) critic_grad = tape.gradient(critic_loss, self.network.trainable_variables) # compute critic gradient self.optimizer.apply_gradients(zip(critic_grad, self.network.trainable_variables)) def predict(self, obs): """Predict Q-value from approximation function(Q-network) """ return self.network.predict(obs) def target_predict(self, new_obs): """Predict target Q-value from approximation function(Q-network) """ return self.target_network.predict(new_obs) def target_update(self): """ soft target update for training target critic network """ weights, weights_t = self.network.get_weights(), self.target_network.get_weights() for i in range(len(weights)): weights_t[i] = self.tau*weights[i] + (1-self.tau)*weights_t[i] self.target_network.set_weights(weights_t) def save_network(self, path): self.network.save_weights(path + '_critic.h5') self.target_network.save_weights(path + '_critic_t.h5') def load_network(self, path): self.network.load_weights(path + '_critic.h5') self.target_network.load_weights(path + '_critic_t.h5') print(self.network.summary())