Exemple #1
0
class ModelCritic(tf.keras.Model):

    def __init__(self,input_dims):
        super().__init__()
        self.state_input=Input(shape=input_dims,name="state_input")
        self.fc1 = Dense(512,activation='elu',name='forward1',kernel_initializer=keras.initializers.RandomUniform(minval=-1./512,maxval=1./512))
        self.fc2 = Dense(256,activation='elu',name='forward2',kernel_initializer=keras.initializers.RandomUniform(minval=-1./256,maxval=1./256))
        self.fc3 = Dense(128,activation='elu',name='forward3',kernel_initializer=keras.initializers.RandomUniform(minval=-1./128,maxval=1./128))
        self.value_func = Dense(1,activation='linear',name='value_func',kernel_initializer=keras.initializers.RandomUniform(minval=-3e-4,maxval=3e-4))

    def call(self,input_data):
        #x = self.state_input(input_data)
        x = self.fc1(input_data)
        x = self.fc2(x)
        x = self.fc3(x)
        v = self.value_func(x)
        return v

    def setup(self,gamma=0.99):
        self.gamma = gamma
        self.optimizer = Adam(lr=hyperparam['critic_lr'])

    def learn(self,reward,prev_state,state):
        with tf.GradientTape() as tape:
            v_1 = self(prev_state,training=True)
            v = self(state,training=True)
            td = reward + self.gamma*v - v_1
            c_loss = td**2
        grads = tape.gradient(c_loss,self.trainable_variables)
        self.optimizer.apply_gradients(zip(grads,self.trainable_variables))
        return td,c_loss
class World_01(World_00):
    def __init__(self):
        World_00.__init__(self)
        self.memory = deque(maxlen=2000)
        self.N_batch = 64
        self.t_model = create_q_model(self.num_states, self.num_actions)
        self.discount_factor = 0.99
        self.learning_rate = 0.001
        self.optimizer = Adam(lr=self.learning_rate)

    def trial(self, flag_render=False):
        env_test_model_memory(self.memory, self.env,
            self.model, n_episodes=10, flag_render=flag_render)
        print(len(self.memory))

    def train_memory(self):
        if len(self.memory) >= self.N_batch:
            memory_batch = random.sample(self.memory, self.N_batch)
            s_l,a_l,r_l,next_s_l,done_l = [np.array(x) for x in list_rotate(memory_batch)]
            model_w = self.model.trainable_variables
            with tf.GradientTape() as tape:
                Qsa_pred_l = self.model(s_l)
                a_l_onehot = tf.one_hot(a_l, self.num_actions)
                Qs_a_pred_l = tf.reduce_sum(a_l_onehot * Qsa_pred_l, axis=1)    

                Qsa_tpred_l = self.t_model(next_s_l) 
                Qsa_tpred_l = tf.stop_gradient(Qsa_tpred_l)

                max_Q_next_s_a_l = np.amax(Qsa_tpred_l, axis=-1)
                Qs_a_l = r_l + (1 - done_l) * self.discount_factor * max_Q_next_s_a_l
                loss = tf.reduce_mean(tf.square(Qs_a_l - Qs_a_pred_l))
            grads = tape.gradient(loss, model_w)
            self.optimizer.apply_gradients(zip(grads, model_w))
class Agent():
    def __init__(self, num_actions, gamma=Constants.discount_factor):
        self.actor_opt = Adam(lr=Constants.lr)
        self.critic_opt = Adam(lr=Constants.lr)
        self.actor = Actor(num_actions)
        self.critic = Critic()

    def state2vec(self, s):
        temp = list(s)
        temp.append(1)
        for param in Constants.state:
            if Constants.state[param].standardized:
                if Constants.state[param].type != "binary":
                    temp[Constants.state[param].
                         index] = float(temp[Constants.state[param].index] -
                                        Constants.state[param].range[0]
                                        ) / Constants.state[param].range[1]
        return np.array([temp])

    def act(self, state):
        prob = self.actor(self.state2vec(state))
        dist = Categorical(probs=prob, dtype=tf.float32)
        action = dist.sample()
        return self.state2vec(action)

    def actor_loss(self, prob, action, temporal_diff):
        dist = Categorical(probs=prob, dytpe=dtype.float32)
        log_prob = dist.log_prob(action)
        loss = -log_prob * temporal_diff
        return loss

    def learn(self, cur_state, action, new_state, reward):
        cur_state = self.state2vec(cur_state)
        new_state = self.state2vec(new_state)
        with GradientTape() as actor_tape, GradientTape() as critic_tape:
            prob = self.actor(state, training=True)
            value = self.critic(state, training=True)
            value_new = self.critic(new_state, training=True)
            temporal_diff = reward + self.gamma * value_new - value
            actor_loss = self.actor_loss(prob, action, temporal_diff)
            critic_loss = temporal_diff**2
        actor_grads = actor_tape.gradient(actor_loss,
                                          self.actor.trainable_variables)
        critic_grads = critic_tape.gradient(critic_loss,
                                            self.critic.trainable_variables)
        self.actor_opt.apply_gradients(
            zip(actor_grads, self.actor.trainable_variables))
        self.critic_opt.apply_gradients(
            zip(critic_grads, self.critic.trainable_variables))
        return actor_loss, critic_loss

    def save_agent(self, save_dir):
        saved_model.save(self.actor, save_dir + "actor")
        saved_model.save(self.critic, save_dir + "critic")

    def load_agent(self):
        self.actor = saved_model.load(Constants.load_model_dir + "actor")
        self.critic = saved_model.load(Constants.load_model_dir + "critic")
Exemple #4
0
class ModelActor(tf.keras.Model):

    def __init__(self,input_dims,no_action=2):
        super().__init__()
        self.state_input=Input(shape=input_dims,name="state_input")
        self.fc1 = Dense(1024,activation='elu',name='forward1',kernel_initializer=keras.initializers.RandomUniform(minval=-1./1024,maxval=1./1024))
        self.fc2 = Dense(512,activation='elu',name='forward2',kernel_initializer=keras.initializers.RandomUniform(minval=-1./512,maxval=1./512))
        self.fc3 = Dense(256,activation='elu',name='forward3',kernel_initializer=keras.initializers.RandomUniform(minval=-1./256,maxval=1./256))
        self.fc4 = Dense(128,activation='elu',name='forward4',kernel_initializer=keras.initializers.RandomUniform(minval=-1./128,maxval=1./128))
        self.mu = Dense(no_action,activation='linear',name='mu1',kernel_initializer=keras.initializers.RandomUniform(minval=-3e-3,maxval=3e-3))
        self.sigma = Dense(no_action,activation='linear',name='sigma1',kernel_initializer=keras.initializers.RandomUniform(minval=-3e-3,maxval=3e-3))
    def call(self,input_data):
        #x = self.state_input(input_data)
        x = self.fc1(input_data)
        x = self.fc2(x)
        x = self.fc3(x)
        x = self.fc4(x)
        probmu = self.mu(x)
        probsigma = self.sigma(x)
        probsigma = tf.nn.softplus(probsigma) + 1e-5
        return probmu,probsigma

    def setup(self,gamma=0.99):
        self.gamma = gamma
        self.optimizer = Adam(lr=hyperparam['actor_lr'])

    def act(self,state):
        probmu, probsigma = self(np.array(state))
        dist = tfp.distributions.Normal(loc=probmu.numpy(),scale=probsigma.numpy())
        action = dist.sample([1])
        return action.numpy()
    def actor_loss(self,probmu,probsigma,actions,td):
        dist = tfp.distributions.Normal(loc=probmu,scale=probsigma)
        log_prob = dist.log_prob(actions + 1e-5)
        loss = -log_prob*td
        return loss
    def learn(self,prev_state,td):
        with tf.GradientTape() as tape:
            pm,ps = self(prev_state,training=True)
            action = self.act(prev_state)
            a_loss = self.actor_loss(pm,ps,action,td)
        grads = tape.gradient(a_loss,self.trainable_variables)
        self.optimizer.apply_gradients(zip(grads,self.trainable_variables))
        return a_loss
Exemple #5
0
    def _create_actor_model(self):
        state_input = Input(shape=self.env.observation_space.shape)
        h1 = Dense(24)(state_input)
        output = Dense(self.env.action_space.shape[0],
                       activation='softmax')(h1)

        model = Model(inputs=state_input, outputs=output)
        adam = Adam(lr=0.001)
        model.compile(loss="mse", optimizer=adam)

        self._action_gradient = tf.placeholder(
            tf.float32, [None, self.env.action_space.shape[0]])
        weights = model.trainable_weights
        grads = tf.gradients(output, weights, -self._action_gradient)
        adam = tf.train.AdamOptimizer(.000001)
        self._optimize_actor = adam.apply_gradients(zip(grads, weights))
        return state_input, model
    t2 = tf.reshape(
        sync_in2, (sync_in2.shape[0], sync_in2.shape[1], sync_in2.shape[2], 1))

    # t1.shape
    # t2.shape

    p1 = model(t1)
    p2 = model(t2)

    p1 = tf.concat([p1], 0)
    print(p1.numpy())

    # sync_la = loader.encode_multihot(sync_la)
    # sync_la2 = loader.encode_multihot(sync_la2)

    # sync_la = tf.convert_to_tensor(sync_la)

    loss = tf.losses.binary_crossentropy(sync_la, p1)

    default_opt = Adam(learning_rate=1e-3)
    grad = tape.gradient(loss, model.trainable_variables)
    print(grad)
    default_opt.apply_gradients(zip(grad, model.trainable_variables))

pc = np.concatenate([p1, p2])

k = loader.encode_multihot(sync_la)
np.array(k).shape

tf.losses.binary_crossentropy(k, model(t1)).numpy().shape
Exemple #7
0
class DCGAN:
    def __init__(self, channels=1, batchsize=50):

        # Dataset features:
        self.channels = channels
        self.freq_sample = 25
        self.time_sample = 342
        self.eeg_shape = (self.freq_sample, self.time_sample, self.channels)

        # Model specific parameters (Noise generation, Dropout for overfitting reduction, etc...):
        self.noise = 100
        self.dropout = 0.25
        self.alpha = 0.2
        self.momentum = 0.8
        self.batchsize = batchsize

        # Choosing Adam optimiser for both generator and discriminator to feed in to the model:
        self.optimiser = Adam(0.0002, 0.2) # Values from the EEG GAN paper found to be most optimal

        # Build both the Generator and Discriminator:
        # We will train the combined model this time, unlike standard GAN
        self.generator = self.make_generator()
        self.discriminator = self.make_discriminator()

        # Useful for creating a sample directory later
        self.dir = 'EEG_samples'


    def make_generator(self):
        '''
        Creates a generator model that takes in randomly generated noise, then uses
        3 upsampling layers to return an image that is fed into the discriminator
        which then distinguishes whether or not it is a real or fake one. Weights are adjusted
        accordingly such that it can eventually generate a real signal.
        :return:
        '''

        model = Sequential()

        model.add(Dense(4 * 41 * 256, use_bias=False, input_shape=(self.noise,)))
        model.add(BatchNormalization(momentum=self.momentum))
        model.add(LeakyReLU())

        model.add(Reshape((4, 41, 256)))

        model.add(Conv2DTranspose(128, (5, 4), strides=(2, 2), padding='valid', use_bias=False))
        model.add(BatchNormalization(momentum=self.momentum))
        model.add(LeakyReLU())

        model.add(Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='valid', use_bias=False))
        model.add(BatchNormalization(momentum=self.momentum))
        model.add(LeakyReLU())

        model.add(Conv2DTranspose(self.channels, (5, 5), strides=(1, 2), padding='same', use_bias=False,
                                         activation='tanh')) # Using tanh for output also based on the EEG paper
        assert model.output_shape == (None, 25, 342, self.channels)

        # Prints a small summary of the network
        # model.summary()

        return model

    def make_discriminator(self):
        '''
        Creates a discriminator model that distingushes the fed images from generator,
        and also is trained using a training loop (see below). The Discriminator is a simple
        2 layer CNN that returns either a 'True' or 'False'. Values are then adjusted accordingly
        per epoch to update weights and biases such that it produces the right output (i.e. it can
        discriminate fake from real).
        :return:
        '''

        model = Sequential()
        model.add(Conv2D(64, (5, 5), strides=(2, 2), padding='same',
                                input_shape=[25, 342, self.channels]))
        model.add(BatchNormalization(momentum=self.momentum))
        model.add(LeakyReLU(alpha=self.alpha))
        model.add(Dropout(self.dropout))

        model.add(Conv2D(128, (5, 5), strides=(1, 2), padding='same'))
        model.add(BatchNormalization(momentum=self.momentum))
        model.add(LeakyReLU(alpha=self.alpha))
        model.add(Dropout(self.dropout))

        model.add(Flatten())
        model.add(Dense(1))
        assert model.output_shape == (None, 1)

        # Prints a small summary of the network
        # model.summary()

        return model

    def make_fakedata(self, noise_shape=100):
        '''
        Generates the fake data by drawing random samples from
        a normal Gaussian distribution (which is what np.random.normal
        does). This is for the generator to use.
        :return: Generated signal, Noise np.array
        '''

        noise = np.random.normal(0, 1, (noise_shape, self.noise))
        gen_imgs = self.generator.predict(noise)
        return gen_imgs, noise


    def discriminator_loss(self, real_output, fake_output):
        '''
        Defines the loss function for the descriminator.
        Uses cross entropy a.k.a (log-loss) helper function from
        Keras 'BinaryCrossEntropy'. Returns the combined loss
        '''
        cross_entropy = BinaryCrossentropy(from_logits=True)
        real_loss = cross_entropy(tf.ones_like(real_output), real_output) # Zero output for real
        fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output) # One output for fake
        total_loss = real_loss + fake_loss
        return total_loss

    def generator_loss(self, fake_output):
        '''
        Like the above but this time for generator...
        :return: Generator loss value
        '''

        cross_entropy = BinaryCrossentropy(from_logits=True)
        return cross_entropy(tf.ones_like(fake_output), fake_output) # TF array of ones for real output

    @tf.function
    def train_step(self, images):
        '''
        This training step function that follows from the official TensorFlow documentation.
        It is in the form of tf.function which allows it to be compiled, rather than
        compiling the combined models alone everytime. More specificially, it makes use
        of GradientTape() function to train both generator and discriminator separately.
        :return: Discriminator and Generator loss
        '''

        # GradientTape allows us to do automatic differentiation handled by TensorFlow
        # Useful when doing back propagation obviously. It also watches all the differentiable
        # Variables

        noise = tf.random.normal([self.batchsize, self.noise])

        with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
            generated_images = self.generator(noise, training=True)

            real_output = self.discriminator(images, training=True)
            fake_output = self.discriminator(generated_images, training=True)

            gen_loss = self.generator_loss(fake_output)
            disc_loss = self.discriminator_loss(real_output, fake_output)

        grad_gen = gen_tape.gradient(gen_loss, self.generator.trainable_variables)
        grad_disc = disc_tape.gradient(disc_loss, self.discriminator.trainable_variables)

        self.optimiser.apply_gradients(zip(grad_gen, self.generator.trainable_variables))
        self.optimiser.apply_gradients(zip(grad_disc, self.discriminator.trainable_variables))


        return disc_loss, gen_loss

    def train(self, dataset, epochs, sample_interval=100):

        '''
        The training function that has a loop which trains the model on
        every epoch/iteration. Calls the train_step() compiled function
        which trains the combined model at the same time.
        '''

        # Allows us to 'unpack' our dataset using .from_tensor_slices, shuffling it
        # and also batching it.

        gen_loss, disc_loss = [], []
        g_tot, d_tot = [], []

        data = tf.data.Dataset.from_tensor_slices(dataset.astype('float32'))\
                .shuffle(dataset.shape[0]).batch(self.batchsize)

        for epoch in range(epochs):

            for image_batch in data:
                disc_loss_batch, gen_loss_batch = self.train_step(image_batch)

                gen_loss.append(gen_loss_batch)
                disc_loss.append(disc_loss_batch)

            g_loss = sum(gen_loss)/len(gen_loss)
            d_loss = sum(disc_loss)/len(disc_loss)

            g_tot.append(g_loss)
            d_tot.append(d_loss)

            if epoch % sample_interval == 0:
                print("epoch: {}, generator loss: {}, discriminator loss: {}".format
                    (epoch, g_loss, d_loss))

                # Allows us to generate the signal and get the fake one for a
                # Arbitrary trial number. Plots it and save it every sample_interval
                # Which is 100 in this case.
                generated_signal, _ = self.make_fakedata(noise_shape=100)
                trial_num, channel = 30, 0
                real_signal = np.expand_dims(dataset[trial_num], axis=0)

                # Plots the generated samples for the selected channels.
                # Recall the channels are chosen during the Load_and_Preprocess Script
                # Here they just correspond to C3 only (channel 7 was selected).
                fig, axs = plt.subplots(1, 2)
                fig.suptitle('Comparison of Generated vs. Real Signal (Spectrogram) for one trial, one channel')
                fig.tight_layout()
                axs[0].imshow(generated_signal[0, :, :, channel], aspect='auto')
                axs[0].set_title('Generated Signal', size=10)
                axs[0].set_xlabel('Time Sample')
                axs[0].set_ylabel('Frequency Sample')
                axs[1].imshow(real_signal[0, :, :, channel], aspect='auto')
                axs[1].set_title('Fake Signal', size=10)
                axs[1].set_xlabel('Time Sample')
                axs[1].set_ylabel('Frequency Sample')
                plt.show()

                # Save the generated samples within the current working dir
                # in a folder called 'EEG Samples', every 100 epochs.
                if not os.path.exists(self.dir):
                    os.makedirs(self.dir)

                plt.savefig("%s/%d.png" % (self.dir, epoch))
                plt.close()

        # Plot the generator and discriminator losses for all the epochs
        plt.figure()
        plt.plot(g_tot, 'r')
        plt.plot(d_tot, 'b')
        plt.title('Loss history')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend(['Generator', 'Discriminator'])
        plt.grid()
        plt.show()
Exemple #8
0
class CriticNet():
    """ Critic Network for PPO
	"""
    def __init__(self, in_dim, out_dim, lr_, tau_, discount_factor):
        self.obs_dim = in_dim
        self.act_dim = out_dim
        self.lr = lr_
        self.discount_factor = discount_factor
        self.tau = tau_

        # initialize critic network and target
        self.network_1, self.network_2 = self.create_network(
        ), self.create_network()
        self.target_network_1, self.target_network_2 = self.create_network(
        ), self.create_network()

        self.optimizer1, self.optimizer2 = Adam(self.lr), Adam(self.lr)

        # copy the weights for initialization
        weights_ = self.network_1.get_weights(), self.network_2.get_weights()
        self.target_network_1.set_weights(weights_[0])
        self.target_network_2.set_weights(weights_[1])

        self.critic_loss = None

    def create_network(self):
        """ Create a Critic Network Model using Keras
			as a Q-value approximator function
		"""
        # input layer(observations and actions)
        input_obs = Input(shape=self.obs_dim)
        input_act = Input(shape=(self.act_dim, ))
        inputs = [input_obs, input_act]
        concat = Concatenate(axis=-1)(inputs)

        # hidden layer 1
        h1_ = Dense(24,
                    kernel_initializer=GlorotNormal(),
                    kernel_regularizer=l2(0.01))(concat)
        h1_b = BatchNormalization()(h1_)
        h1 = Activation('relu')(h1_b)

        # hidden_layer 2
        h2_ = Dense(16,
                    kernel_initializer=GlorotNormal(),
                    kernel_regularizer=l2(0.01))(h1)
        h2_b = BatchNormalization()(h2_)
        h2 = Activation('relu')(h2_b)

        # output layer(actions)
        output_ = Dense(1,
                        kernel_initializer=GlorotNormal(),
                        kernel_regularizer=l2(0.01))(h2)
        output_b = BatchNormalization()(output_)
        output = Activation('linear')(output_b)

        return Model(inputs, output)

    def train(self, obs, acts, target):
        """Train Q-network for critic on sampled batch
		"""
        with tf.GradientTape() as tape1:
            q1_values = self.network_1([obs, acts], training=True)
            critic_loss_1 = tf.reduce_mean(tf.math.square(q1_values - target))
        critic_grad_1 = tape1.gradient(
            critic_loss_1,
            self.network_1.trainable_variables)  # compute critic gradient
        self.optimizer1.apply_gradients(
            zip(critic_grad_1, self.network_1.trainable_variables))

        with tf.GradientTape() as tape2:
            q2_values = self.network_2([obs, acts], training=True)
            critic_loss_2 = tf.reduce_mean(tf.math.square(q2_values - target))
        critic_grad_2 = tape2.gradient(
            critic_loss_2,
            self.network_2.trainable_variables)  # compute critic gradient

        self.optimizer2.apply_gradients(
            zip(critic_grad_2, self.network_2.trainable_variables))

        tf.print("critic loss :", critic_loss_1, critic_loss_2)
        self.critic_loss = float(min(critic_loss_1, critic_loss_2))
        # tf.print("critic loss :",critic_loss_1)
        # self.critic_loss = float(critic_loss_1)

    def target_update(self):
        """ soft target update for training target critic network
		"""
        weights, weights_t = self.network_1.get_weights(
        ), self.target_network_1.get_weights()
        for i in range(len(weights)):
            weights_t[i] = self.tau * weights[i] + (1 -
                                                    self.tau) * weights_t[i]
        self.target_network_1.set_weights(weights_t)

        weights, weights_t = self.network_2.get_weights(
        ), self.target_network_2.get_weights()
        for i in range(len(weights)):
            weights_t[i] = self.tau * weights[i] + (1 -
                                                    self.tau) * weights_t[i]
        self.target_network_2.set_weights(weights_t)

    def save_network(self, path):
        self.network_1.save_weights(path + '_critic1.h5')
        self.target_network_1.save_weights(path + '_critic1_t.h5')
        self.network_2.save_weights(path + '_critic2.h5')
        self.target_network_2.save_weights(path + '_critic2_t.h5')

    def load_network(self, path):
        self.network_1.load_weights(path + '_critic1.h5')
        self.target_network_1.load_weights(path + '_critic1_t.h5')
        self.network_2.load_weights(path + '_critic2.h5')
        self.target_network_2.load_weights(path + '_critic2_t.h5')
class WasserGAN_GP():
    def __init__(self, channels=1, batchsize=32, task=1, subject=1):

        # Dataset features:
        self.channels = channels
        self.freq_sample = 25
        self.time_sample = 342
        self.eeg_shape = (self.freq_sample, self.time_sample, self.channels)

        # Model specific parameters (Noise generation, Dropout for overfitting reduction, etc...):
        self.noise = 100
        self.dropout = 0.25
        self.alpha = 0.2
        self.momentum = 0.8
        self.batchsize = batchsize
        self.critic_iter = 5
        self.gp_weight = 10

        # Choosing Adam optimiser for both generator and discriminator to feed in to the model:
        self.gen_optimiser = Adam(
            0.0002,
            0.2)  # Values from the EEG GAN paper found to be most optimal
        self.critic_optimiser = RMSprop(
            0.0005)  # NOTE here we use a different optimiser for the critic
        # The RMSprop optimiser is more stable  than the Adam in terms of stability for the WGAN
        # This is from the Wasserstein GAN paper

        # Build both the Generator and Discriminator:
        # We will train the combined model this time, unlike standard GAN
        self.generator = self.make_generator()
        self.critic = self.make_critic()

        # Useful for creating a sample directory later
        self.dir = 'EEG_samples'
        self.subject = subject  # Used to store the subject data later
        self.task = task  # Used to store the task data later

    def make_generator(self):
        '''
        Creates a generator model that takes in randomly generated noise, then uses
        3 upsampling layers to return an image that is fed into the discriminator
        which then distinguishes whether or not it is a real or fake one. Weights are adjusted
        accordingly such that it can eventually generate a real signal.
        :return:
        '''

        model = Sequential()

        model.add(
            Dense(4 * 41 * 256, use_bias=False, input_shape=(self.noise, )))
        model.add(BatchNormalization(momentum=self.momentum))
        model.add(LeakyReLU())

        model.add(Reshape((4, 41, 256)))

        model.add(
            Conv2DTranspose(128, (5, 4),
                            strides=(2, 2),
                            padding='valid',
                            use_bias=False))
        model.add(BatchNormalization(momentum=self.momentum))
        model.add(LeakyReLU())

        model.add(
            Conv2DTranspose(64, (5, 5),
                            strides=(2, 2),
                            padding='valid',
                            use_bias=False))
        model.add(BatchNormalization(momentum=self.momentum))
        model.add(LeakyReLU())

        model.add(
            Conv2DTranspose(self.channels, (5, 5),
                            strides=(1, 2),
                            padding='same',
                            use_bias=False,
                            activation='tanh')
        )  # Using tanh for output also based on the EEG paper
        assert model.output_shape == (None, 25, 342, self.channels)

        # Prints a small summary of the network
        # model.summary()

        return model

    def make_critic(self):
        '''
        This time, the discriminator is replaced with a critic that gives a score to the realness
        or fakeness of a signal. The Critic is similar to the Discriminator in DCGAN as it is a simple
        2 layer CNN that returns a score. Values are then adjusted accordingly
        per epoch to update weights and biases such that it produces the right output (i.e. it can
        discriminate fake from real). NOTE that we also update the critic more than the generator
        for improved stability (see critic_iterations below).
        :return:
        '''

        model = Sequential()
        model.add(
            Conv2D(64, (5, 5),
                   strides=(2, 2),
                   padding='same',
                   input_shape=[25, 342, self.channels]))
        model.add(BatchNormalization())
        model.add(LeakyReLU(alpha=self.alpha))
        model.add(Dropout(self.dropout))

        model.add(Conv2D(128, (5, 5), strides=(1, 2), padding='same'))
        model.add(BatchNormalization())
        model.add(LeakyReLU(alpha=self.alpha))
        model.add(Dropout(self.dropout))

        model.add(Flatten())
        model.add(Dense(1))
        assert model.output_shape == (None, 1)

        # Prints a small summary of the network
        # model.summary()

        return model

    def make_fakedata(self, noise_shape=100):
        '''
        Generates the fake data by drawing random samples from
        a normal Gaussian distribution (which is what np.random.normal
        does). This is for the generator to use.
        :return: Generated signal, Noise np.array
        '''
        noise = np.random.normal(0, 1, (noise_shape, self.noise))
        return self.generator(noise, training=False), noise

    def critic_loss(self, f_logits, r_logits):
        '''
        Implementation of the critic and generator loss using the Wasserstein
        Loss Function. For the critic, it uses the average critic score 'tf-
        reduce_mean' of the fake signals (or logits, probability values using
        logistic regression) minus the average critic score of real signals. This
        is done in order to maximise the gap between scores of real and fake signals.
        :param f_logits: fake signal probability scores
        :param r_logits: real signal proability scores
        :return: Wasserstein Critic Loss Value
        '''
        return reduce_mean(f_logits) - reduce_mean(r_logits)

    def generator_loss(self, f_logits):
        '''
        Like critic loss, except the generator loss uses only the average critic score
        of fake signals rather than both for its update. An added benefit of the WGAN
        is that it learns whether the generator is performing or not.
        :param fake_logits: fake signal probability scores
        :return: Wasserstein Generator Loss Value
        '''
        return -reduce_mean(f_logits)

    def gradient_penalty(self, critic, real_signal, fake_signal):
        '''
        Gradient penalty is used instead of weight clipping to enforce the
        Lipschitz Constraint 'LC' (uniform continuitiy between loss functions).
        It also helps reduce exploding gradients. The GP term penalizes the
        model if the gradient norm moves away from 1 (This means that the
        functions are not 1-Lipschitz where gradient norms are different values.)
        :param discriminator:
        :param real_signal:
        :param fake_signal:
        :return: Gradient Penalty term added to critic loss
        '''

        # Draw samples from a uniform distribution
        delta = tf.random.uniform([real_signal.shape[0], 1, 1, 1], 0., 1.)
        inter = real_signal + (delta * (real_signal - fake_signal))

        # Use GradientTape to watch the gradient variables.
        with tf.GradientTape() as tape:
            tape.watch(inter)
            pred = critic(inter)

        # Uses the squared difference from 1 norm as the Gradient Penalty
        grad = tape.gradient(pred, inter)[0]
        gradient_l2_norm = tf.sqrt(tf.reduce_sum(tf.square(grad)))

        return reduce_mean(gradient_l2_norm)

    @tf.function
    def train_step(self, sig):
        '''
        Similar to train_step in DCGAN however, recall that for the WGAN we
        train the critic over several iterations to improve stability,
        hence the term critic_iter. Also uses GradientTape() to watch over
        the trainable weights etc...
        :param sig: takes in the real signal
        :return: generator and discriminator loss
        '''

        for _ in range(self.critic_iter):
            with tf.GradientTape() as disc_tape:
                noise = tf.random.normal([sig.shape[0], self.noise])

                gen_sig = self.generator(noise, training=True)
                f_logits = self.critic(gen_sig, training=True)
                r_logits = self.critic(sig, training=True)

                critic_loss = self.critic_loss(f_logits, r_logits)
                gp = self.gradient_penalty(partial(self.critic, training=True),
                                           sig, gen_sig)
                critic_loss += self.gp_weight * gp

            disc_grads = disc_tape.gradient(critic_loss,
                                            self.critic.trainable_variables)
            self.critic_optimiser.apply_gradients(
                zip(disc_grads, self.critic.trainable_variables))

        noise = tf.random.normal([sig.shape[0], self.noise])

        with tf.GradientTape() as gen_tape:
            gen_sig = self.generator(noise, training=True)
            f_logits = self.critic(gen_sig, training=True)
            gen_loss = self.generator_loss(f_logits)

        gen_grads = gen_tape.gradient(gen_loss,
                                      self.generator.trainable_variables)
        self.gen_optimiser.apply_gradients(
            zip(gen_grads, self.generator.trainable_variables))

        return critic_loss, gen_loss

    # training loop
    def train(self, dataset, epochs, sample_interval=100):
        '''
        The training function that has a loop which trains the model on
        every epoch/iteration. Calls the train_step() compiled function
        which trains the combined model at the same time.
        '''

        gen_loss, disc_loss = [], []
        g_tot, d_tot = [], []

        # Allows us to 'unpack' our dataset using .from_tensor_slices, shuffling it
        # and also batching it.

        data = tf.data.Dataset.from_tensor_slices(dataset.astype('float32')) \
            .shuffle(dataset.shape[0]).batch(self.batchsize)

        # start training loop
        for epoch in range(epochs):

            for image_batch in data:
                disc_loss_batch, gen_loss_batch = self.train_step(image_batch)

                # Turn into Numpy Array
                disc_loss_batch = tf.reduce_mean(
                    disc_loss_batch).numpy() / float(self.critic_iter)
                gen_loss_batch = tf.reduce_mean(gen_loss_batch).numpy()

                gen_loss.append(gen_loss_batch)
                disc_loss.append(disc_loss_batch)

            g_loss = sum(gen_loss) / len(gen_loss)
            d_loss = sum(disc_loss) / len(disc_loss)

            g_tot.append(g_loss)
            d_tot.append(d_loss)

            if epoch % sample_interval == 0:
                print("epoch: {}, generator loss: {}, discriminator loss: {}".
                      format(epoch, g_loss, d_loss))

                # Allows us to generate the signal and get the fake one for a
                # Arbitrary trial number. Plots it and save it every sample_interval
                # Which is 100 in this case.
                generated_signal, _ = self.make_fakedata(noise_shape=100)
                trial_num, channel = 30, 0
                real_signal = np.expand_dims(dataset[trial_num], axis=0)

                # Plots the generated samples for the selected channels.
                # Recall the channels are chosen during the Load_and_Preprocess Script
                # Here they just correspond to C3 only (channel 7 was selected).
                fig, axs = plt.subplots(1, 2)
                fig.suptitle(
                    'Comparison of Generated vs. Real Signal (Spectrogram) for one trial, one channel'
                )
                fig.tight_layout()
                axs[0].imshow(generated_signal[0, :, :, channel],
                              aspect='auto')
                axs[0].set_title('Generated Signal', size=10)
                axs[0].set_xlabel('Time Sample')
                axs[0].set_ylabel('Frequency Sample')
                axs[1].imshow(real_signal[0, :, :, channel], aspect='auto')
                axs[1].set_title('Fake Signal', size=10)
                axs[1].set_xlabel('Time Sample')
                axs[1].set_ylabel('Frequency Sample')
                plt.show()

                # Save the generated samples within the current working dir
                # in a folder called 'EEG Samples', every 100 epochs.
                if not os.path.exists(self.dir):
                    os.makedirs(self.dir)

                plt.savefig("%s/%d.png" % (self.dir, epoch))
                plt.close()

        # Plot the generator and discriminator losses for all the epochs
        plt.figure()
        plt.plot(g_tot, 'r')
        plt.plot(d_tot, 'b')
        plt.title('Loss history')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend(['Generator', 'Discriminator'])
        plt.grid()
        plt.show()

        # Save subject and task data such that it can be used to generate
        # Fake samples later
        fp = os.path.join(os.getcwd(), 'EEG_Samples')
        sp = os.path.join(
            fp, 'Subject{}WGAN_Model_Data_For_Task{}.h5'.format(
                self.subject, self.task))
        self.generator.save(sp)
Exemple #10
0
class ActorNet():
	""" Actor Network for PPO
	"""
	def __init__(self, in_dim, out_dim, act_range, lr_, tau_):
		self.obs_dim = in_dim
		self.act_dim = out_dim
		self.act_range = act_range
		self.lr = lr_; self.tau = tau_

		# initialize actor network and target
		self.network = self.create_network()
		self.target_network = self.create_network()

		# initialize optimizer
		self.optimizer = Adam(self.lr)

		# copy the weights for initialization
		weights_ = self.network.get_weights()
		self.target_network.set_weights(weights_)


	def create_network(self):
		""" Create a Actor Network Model using Keras
		"""
		# input layer(observations)
		input_ = Input(shape=self.obs_dim)

		# hidden layer 1
		h1_ = Dense(24,kernel_initializer=GlorotNormal())(input_)
		h1_b = BatchNormalization()(h1_)
		h1 = Activation('relu')(h1_b)

		# hidden_layer 2
		h2_ = Dense(16,kernel_initializer=GlorotNormal())(h1)
		h2_b = BatchNormalization()(h2_)
		h2 = Activation('relu')(h2_b)

		# output layer(actions)
		output_ = Dense(self.act_dim,kernel_initializer=GlorotNormal())(h2)
		output_b = BatchNormalization()(output_)
		output = Activation('tanh')(output_b)
		scalar = self.act_range * np.ones(self.act_dim)
		out = Lambda(lambda i: i * scalar)(output)

		return Model(input_,out)

	def train(self, obs, critic):
		""" training Actor's Weights
		"""
		with tf.GradientTape() as tape:
			actions = self.network(obs)
			actor_loss = -tf.reduce_mean(critic([obs,actions]))
			tf.print("actor loss :",actor_loss)
		actor_grad = tape.gradient(actor_loss,self.network.trainable_variables)
		self.optimizer.apply_gradients(zip(actor_grad,self.network.trainable_variables))

	def target_update(self):
		""" soft target update for training target actor network
		"""
		weights, weights_t = self.network.get_weights(), self.target_network.get_weights()
		for i in range(len(weights)):
			weights_t[i] = self.tau*weights[i] + (1-self.tau)*weights_t[i]
		self.target_network.set_weights(weights_t)

	def predict(self, obs):
		""" predict function for Actor Network
		"""
		return self.network.predict(np.expand_dims(obs, axis=0))

	def target_predict(self, new_obs):
		"""  predict function for Target Actor Network
		"""
		return self.target_network.predict(new_obs)

	def save_network(self, path):
		self.network.save_weights(path + '_actor.h5')
		self.target_network.save_weights(path +'_actor_t.h5')

	def load_network(self, path):
		self.network.load_weights(path + '_actor.h5')
		self.target_network.load_weights(path + '_actor_t.h5')
		print(self.network.summary())
Exemple #11
0
    def gan(self):
    # initialize a GAN trainer

    # this is the fastest way to train a GAN in Keras
    # two models are updated simutaneously in one pass

        noise = Input(shape=self.generator.input_shape[1:])
        real_data = Input(shape=self.discriminator.input_shape[1:])

        generated = self.generator(noise)
        gscore = self.discriminator(generated)
        rscore = self.discriminator(real_data)

        def log_eps(i):
            return K.log(i+1e-11)

        # single side label smoothing: replace 1.0 with 0.9
        dloss = - K.mean(log_eps(1-gscore) + .1 * log_eps(1-rscore) + .9 * log_eps(rscore))
        gloss = - K.mean(log_eps(gscore))

        Adam = tf.train.AdamOptimizer

        lr,b1 = 1e-4,.2 # otherwise won't converge.
        optimizer = Adam(lr)

        grad_loss_wd = optimizer.compute_gradients(dloss, self.discriminator.trainable_weights)
        update_wd = optimizer.apply_gradients(grad_loss_wd)

        grad_loss_wg = optimizer.compute_gradients(gloss, self.generator.trainable_weights)
        update_wg = optimizer.apply_gradients(grad_loss_wg)

        def get_internal_updates(model):
            # get all internal update ops (like moving averages) of a model
            inbound_nodes = model.inbound_nodes
            input_tensors = []
            for ibn in inbound_nodes:
                input_tensors+= ibn.input_tensors
            updates = [model.get_updates_for(i) for i in input_tensors]
            return updates

        other_parameter_updates = [get_internal_updates(m) for m in [self.discriminator,self.generator]]
        # those updates includes batch norm.

        print('other_parameter_updates for the models(mainly for batch norm):')
        print(other_parameter_updates)

        train_step = [update_wd, update_wg, other_parameter_updates]
        losses = [dloss,gloss]

        learning_phase = K.learning_phase()

        def gan_feed(sess,batch_image,z_input):
            # actual GAN trainer
            nonlocal train_step,losses,noise,real_data,learning_phase

            res = sess.run([train_step,losses],feed_dict={
            noise:z_input,
            real_data:batch_image,
            learning_phase:True,
            # Keras layers needs to know whether
            # this run is training or testring (you know, batch norm and dropout)
            })

            loss_values = res[1]
            return loss_values #[dloss,gloss]

        return gan_feed
class StyleGAN():
    def _Get_Generator(self):
        x_out = []
        z = Input(shape=(512, ))
        FC = Dense(512, activation='relu')(z)
        FC = Dense(512, activation='relu')(FC)
        FC = Dense(512, activation='relu')(FC)
        FC = Dense(512, activation='relu')(FC)
        FC = Dense(512, activation='relu')(FC)
        FC = Dense(512, activation='relu')(FC)
        FC = Dense(512, activation='relu')(FC)
        w = Dense(512, activation='relu')(FC)

        noise_inp = Input(shape=(self.img_height, self.img_width, 1))

        x = self.const_tensor
        y_s, y_b = A_block(w, 512)
        noise = B_block(noise_inp, 512, 4)
        hidden = Add()([x, noise])
        hidden = AdaIN()([hidden, y_b, y_s])
        hidden = Activation('relu')(hidden)
        hidden, rgb = G_block(hidden, w, noise_inp, 256)
        x_out.append(rgb)
        hidden, rgb = G_block(hidden, w, noise_inp, 128)
        x_out.append(rgb)
        hidden, rgb = G_block(hidden, w, noise_inp, 64)
        x_out.append(rgb)
        hidden, rgb = G_block(hidden, w, noise_inp, 32)
        x_out.append(rgb)
        hidden, rgb = G_block(hidden, w, noise_inp, 16)
        x_out.append(rgb)
        hidden, rgb = G_block(hidden, w, noise_inp, 8)
        x_out.append(rgb)
        x_out = Add()(x_out)
        x_out = Activation('tanh')(x_out)
        model = Model([z, noise_inp], x_out)
        return model

    def _Get_Discriminator(self):
        x = Input(shape=(self.img_height, self.img_width, self.channels))
        hidden = Conv2D(16, (1, 1), padding='same')(x)
        hidden = D_block(x, 16)
        hidden = D_block(hidden, 32)
        hidden = D_block(hidden, 64)
        hidden = D_block(hidden, 128)
        hidden = D_block(hidden, 256)
        hidden = D_block(hidden, 512)
        hidden = Flatten()(hidden)
        x_out = Dense(1)(hidden)
        model = Model(x, x_out)
        return model

    def _get_model(self):
        G = self._Get_Generator()
        D = self._Get_Discriminator()
        D.trainable = False
        z = Input(shape=(512, ))
        noise_inp = Input(shape=(self.img_height, self.img_width, 1))
        hidden = G([z, noise_inp])
        x_out = D(hidden)
        GD = Model([z, noise_inp], x_out)
        GD.summary()
        return G, D, GD

    def __init__(self, batch_size, img_height, img_width, channels, path):
        self.lamda = 10
        self.reals = None
        self.z = None
        self.noise = None
        self.fakes = None
        self.batch_size = batch_size
        self.epoch = 0
        self.path = path

        self.const = None
        self.load_const(path)

        self.img_height = img_height
        self.img_width = img_width
        self.channels = channels

        self.const_tensor = replicate(self.const, self.batch_size)
        self.Generator, self.Discriminator, self.Stacked_model = self._get_model(
        )

        self.optimizer_D = Adam(learning_rate=0.0001, beta_1=0, beta_2=0.9)
        self.optimizer_G = Adam(learning_rate=0.0001, beta_1=0, beta_2=0.9)

    def train_on_batch_G(self):
        with tf.GradientTape() as tape:
            logits = self.Stacked_model([self.z, self.noise], training=True)
            loss_value = WGAN_loss_G(logits)
        grads = tape.gradient(loss_value, self.Generator.trainable_weights)

        self.optimizer_G.apply_gradients(
            zip(grads, self.Generator.trainable_weights))
        return float(loss_value)

    def train_on_batch_D(self):
        with tf.GradientTape() as tape:
            self.fakes = self.Generator([self.z, self.noise], training=True)
            loss_value = WGAN_loss_D(self)
        grads = tape.gradient(loss_value, self.Discriminator.trainable_weights)

        self.optimizer_D.apply_gradients(
            zip(grads, self.Discriminator.trainable_weights))
        return float(loss_value)

    def save_model(self, path):
        array = np.array(
            [self.epoch, self.img_height, self.img_width, self.batch_size],
            dtype=int)

        print("Backup...", end='')
        if os.path.isfile(os.path.join(path, 'Model.h5')):
            os.rename(os.path.join(path, 'Model.h5'),
                      os.path.join(path, 'Model.bak'))
            os.rename(os.path.join(path, 'Generator.h5'),
                      os.path.join(path, 'Generator.bak'))
            os.rename(os.path.join(path, 'Discriminator.h5'),
                      os.path.join(path, 'Discriminator.bak'))
        print("Done!")

        print("Saving...", end='')
        with h5py.File(os.path.join(path, 'Model.h5'), 'w') as f:
            dset = f.create_dataset("model_details", data=array)
        self.Generator.save_weights(os.path.join(path, 'Generator.h5'))
        self.Discriminator.save_weights(os.path.join(path, 'Discriminator.h5'))
        print("Done!")

    def load_model(self, path):
        with h5py.File(os.path.join(path, 'Model.h5'), 'r') as f:
            data = f['model_details']
            self.epoch = data[0]
            self.img_height, self.img_width = data[1], data[2]
            self.batch_size = data[3]
        self.Generator.load_weights(os.path.join(path, 'Generator.h5'))
        self.Discriminator.load_weights(os.path.join(path, 'Discriminator.h5'))

    def save_const(self, path):
        const = np.reshape(self.const, (4 * 4 * 512))
        print(os.path.join(path, 'key_const.bin'))
        np.savetxt(os.path.join(path, 'key_const.bin'), const, delimiter=',')

    def load_const(self, path):
        if os.path.exists(os.path.join(path, 'key_const.bin')):
            const = np.loadtxt(os.path.join(path, 'key_const.bin'),
                               delimiter=',')
            const = np.reshape(const, (4, 4, 512))
        else:
            const = np.random.normal(size=(4, 4, 512))

        self.const = const

        if not (os.path.exists(os.path.join(path, 'key_const.bin'))):
            self.save_const(path)
Exemple #13
0
class CriticNet():
	""" Critic Network for DDPG
	"""
	def __init__(self, in_dim, out_dim, lr_, tau_, discount_factor):
		self.obs_dim = in_dim
		self.act_dim = out_dim
		self.lr = lr_; self.discount_factor=discount_factor;self.tau = tau_

		# initialize critic network and target
		self.network = self.create_network()
		self.target_network = self.create_network()

		self.optimizer = Adam(self.lr)

		# copy the weights for initialization
		weights_ = self.network.get_weights()
		self.target_network.set_weights(weights_)

		self.critic_loss = None

	def create_network(self):
		""" Create a Critic Network Model using Keras
			as a Q-value approximator function
		"""
		# input layer(observations and actions)
		input_obs = Input(shape=self.obs_dim)
		input_act = Input(shape=(self.act_dim,))
		inputs = [input_obs,input_act]
		concat = Concatenate(axis=-1)(inputs)

		# hidden layer 1
		h1_ = Dense(300, kernel_initializer=GlorotNormal(), kernel_regularizer=l2(0.01))(concat)
		h1_b = BatchNormalization()(h1_)
		h1 = Activation('relu')(h1_b)

		# hidden_layer 2
		h2_ = Dense(400, kernel_initializer=GlorotNormal(), kernel_regularizer=l2(0.01))(h1)
		h2_b = BatchNormalization()(h2_)
		h2 = Activation('relu')(h2_b)

		# output layer(actions)
		output_ = Dense(1, kernel_initializer=GlorotNormal(), kernel_regularizer=l2(0.01))(h2)
		output_b = BatchNormalization()(output_)
		output = Activation('linear')(output_b)

		return Model(inputs,output)

	def Qgradient(self, obs, acts):
		acts = tf.convert_to_tensor(acts)
		with tf.GradientTape() as tape:
			tape.watch(acts)
			q_values = self.network([obs,acts])
			q_values = tf.squeeze(q_values)
		return tape.gradient(q_values, acts)

	def train(self, obs, acts, target):
		"""Train Q-network for critic on sampled batch
		"""
		with tf.GradientTape() as tape:
			q_values = self.network([obs, acts], training=True)
			td_error = q_values - target
			critic_loss = tf.reduce_mean(tf.math.square(td_error))
			tf.print("critic loss :",critic_loss)
			self.critic_loss = float(critic_loss)

		critic_grad = tape.gradient(critic_loss, self.network.trainable_variables)  # compute critic gradient
		self.optimizer.apply_gradients(zip(critic_grad, self.network.trainable_variables))

	def predict(self, obs):
		"""Predict Q-value from approximation function(Q-network)
		"""
		return self.network.predict(obs)

	def target_predict(self, new_obs):
		"""Predict target Q-value from approximation function(Q-network)
		"""
		return self.target_network.predict(new_obs)

	def target_update(self):
		""" soft target update for training target critic network
		"""
		weights, weights_t = self.network.get_weights(), self.target_network.get_weights()
		for i in range(len(weights)):
			weights_t[i] = self.tau*weights[i] + (1-self.tau)*weights_t[i]
		self.target_network.set_weights(weights_t)

	def save_network(self, path):
		self.network.save_weights(path + '_critic.h5')
		self.target_network.save_weights(path + '_critic_t.h5')

	def load_network(self, path):
		self.network.load_weights(path + '_critic.h5')
		self.target_network.load_weights(path + '_critic_t.h5')
		print(self.network.summary())