Esempio n. 1
0
def train(model, preprocess, num_epochs, batch_size, num_steps, lr):
    optimizer = Adam(lr=lr, clipnorm=0.01)
    now = time.time()
    loss = tf.keras.losses.SparseCategoricalCrossentropy()
    cnt = 0
    for epoch in range(num_epochs):
        l_sum = 0
        n = 0
        data_iter = preprocess.get_data_iter(batch_size=batch_size,
                                             num_steps=num_steps)
        for x, y in data_iter:
            with tf.GradientTape(persistent=True) as tape:
                x = tf.one_hot(x, depth=len(preprocess.idx_to_char))
                whole_sequence, _, _ = model(x)
                l = loss(y, whole_sequence)
                grads = tape.gradient(l, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))
                l_sum += np.array(l).item() * len(y)
                n += len(y)

            print('epoch %d, perplexity %f, time %.2f sec' %
                  (epoch + 1, math.exp(l_sum / n), time.time() - now))
            print(
                predict_rnn('分开', 50, model, len(preprocess.idx_to_char),
                            preprocess.idx_to_char, preprocess.char_to_idx))

            cnt += 1
Esempio n. 2
0
class ActorCritic:
    def __init__(self, n_actions, p_lr=.001, c_lr=.001):
        self.policy = Policy(n_actions=n_actions)
        self.critic = Critic()
        self.p_optim = Adam(learning_rate=p_lr)
        self.c_optim = Adam(learning_rate=c_lr)

    @tf.function
    def train_step(self, env, initial_state: tf.Tensor, gamma: float,
                   max_steps_per_episode: int) -> tf.Tensor:

        episode_reward = tf.constant(0)
        for t in range(max_steps_per_episode):
            with tf.GradientTape() as tape, tf.GradientTape() as tape2:
                results = run_episode_step(env, initial_state,
                                           gamma, max_steps_per_episode)

                next_state, done, action_probs, value, next_value, reward = results

                done, action_probs, value, next_value, reward = [
                    tf.expand_dims(x, 1) for x in [done, action_probs, value,
                                                   next_value, reward]
                ]

            actor_loss = compute_loss(action_probs, value, next_value, reward, done)
            critic_loss = huber_loss(values, reward + tf.cast(gamma, tf.float32) * next_value)

        actor_grads = tape.gradient(actor_loss, self.policy.trainable_variables)
        critic_loss = tape2.gradient(critic_loss, self.critic.trainable_variables)
        self.p_optim.apply_gradients(zip(loss, self.policy.trainable_variables))
        self.c_optim.apply_gradients(zip(loss, self.policy.trainable_variables))
        episode_reward += tf.reduce_sum(reward)

        if tf.cast(done, tf.bool):
            return episode_reward
Esempio n. 3
0
def pre_train(generator, train_dataset, valid_dataset, steps, evaluate_every=1,lr_rate=1e-4):
    loss_mean = Mean()
    pre_train_loss = MeanSquaredError()
    pre_train_optimizer = Adam(lr_rate)

    now = time.perf_counter()

    step = 0
    for lr, hr in train_dataset.take(steps):
        step = step+1

        with tf.GradientTape() as tape:
            lr = tf.cast(lr, tf.float32)
            hr = tf.cast(hr, tf.float32)

            sr = generator(lr, training=True)
            loss_value = pre_train_loss(hr, sr)

        gradients = tape.gradient(loss_value, generator.trainable_variables)
        pre_train_optimizer.apply_gradients(zip(gradients, generator.trainable_variables))
        loss_mean(loss_value)

        if step % evaluate_every == 0:
            loss_value = loss_mean.result()
            loss_mean.reset_states()

            psnr_value = evaluate(generator, valid_dataset)

            duration = time.perf_counter() - now
            print(
                f'{step}/{steps}: loss = {loss_value.numpy():.3f}, PSNR = {psnr_value.numpy():3f} ({duration:.2f}s)')

            now = time.perf_counter()
Esempio n. 4
0
    def train(self,X,Y,learningRate,indexes=None):

        #very sorry L2 is currtently out of order I will fix this later

        #apply L2 regularization to avoid overfitting
        #this is really really important
        #regularizer=l2(L2val)#just to be clear this is tf.keras.regularizers.l2
        #regularizer(self.weights)

        #compute gradients of weights and biases
        with GradientTape() as g:
            myTrainableVariables=self.getTrainableVariables()
            g.watch(myTrainableVariables)

            #calculate error
            if self.debug:
                print("EXECUTING")
            guess=self.evaluate(X)
            #calculate error using sqared error
            if self.debug:
                print("TRAINING")
            if self.errorFunction.multipleLabels:
                error=self.errorFunction.execute(guess,Y)
            else:
                error=self.errorFunction.execute(guess,Y,indexes)

        optimizer=Adam(learningRate)
        grads=g.gradient(error,myTrainableVariables)
        optimizer.apply_gradients(zip(grads,myTrainableVariables),)
        return error
class Collection_Critic(tf.keras.Model):
    def __init__(self, critics):
        super().__init__(name="Coll_Critic")
        self.crit = critics
        self.optimizer = Adam(learning_rate=0.0001, beta_1=0, beta_2=0.9)
        self.n = 0

    def update_current_n_layer(self):
        self.n += 1

    def start_fading(self, n):
        self.crit[len(self.crit) - 1 - n].activate_fade_in()
        self.update_current_n_layer()

    def stop_fading(self, n):
        self.crit[len(self.crit) - 1 - n].disactivate_fade_in()

    def call(self, input_tensor):
        x = input_tensor
        for i in range(len(self.crit) - 1 - self.n, len(self.crit)):
            x = self.crit[i](x)
        return x

    def compute_loss(self, y_true, y_pred):
        """ Wasserstein loss
        """
        return backend.mean(y_true * y_pred)

    def backPropagate(self, gradients, trainable_variables):
        self.optimizer.apply_gradients(zip(gradients, trainable_variables))
    def train(self,X,Y,Yi,learningRate,L2val):

        #very sorry L2 is currtently out of order I will fix this later

        #apply L2 regularization to avoid overfitting
        #this is really really important
        #regularizer=l2(L2val)#just to be clear this is tf.keras.regularizers.l2
        #regularizer(self.weights)

        #compute gradients of weights and biases
        with GradientTape() as g:
            for i in range(len(self.nHidden)+1):#iterate over layers
                g.watch(self.getTrainableVariables())

            #calculate error
            guess=self.evaluate([[constant(j) for j in i] for i in X])#convert everything in x to tensorflow format
            #calculate error using sqared error
            error=0
            for i in range(len(Y)):
                error+=(guess[i][Yi[i]]-Y[i][Yi[i]])**2
            error=error/len(Y)

        optimizer=Adam(learningRate)
        grads=g.gradient(error,self.getTrainableVariables())
        optimizer.apply_gradients(zip(grads,self.getTrainableVariables()),)
        return error
Esempio n. 7
0
    def fit(self, batch, epochs=10, batch_size=32, verbose=True, **kwargs):
        """
        Fit model on given batch

        Parameters: 
            batch: List of tuple: (states, game result (value), node values (probs))
            epochs: Number of epochs to train with (optional, default=10)
            batch_size: (optional, default=32)
        """
        optimizer = Adam(**kwargs)
        for e in range(epochs):
            batch_sample = random.sample(batch, batch_size)
            states, true_values, true_probs = self.transform_batch(
                batch_sample)

            with tf.GradientTape() as tape:
                pred_values, pred_probs = self(states)

                # Add a square to give more importance to the value loss
                value_loss = tf.math.square(
                    tf.keras.losses.mean_squared_error(true_values,
                                                       pred_values))

                prob_loss = -tf.keras.losses.categorical_crossentropy(
                    true_probs, pred_probs)
                total_loss = value_loss + prob_loss

            gradients = tape.gradient(total_loss, self.trainable_variables)
            optimizer.apply_gradients(zip(gradients, self.trainable_variables))

        tf.print("Probs loss:", sum(prob_loss), "Value loss:", sum(value_loss),
                 "Total loss:", sum(total_loss))
class REINFORCEAgent:
    def __init__(self, state_size, action_size):
        # 상태의 크기와 행동의 크기 정의
        self.state_size = state_size
        self.action_size = action_size

        # REINFORCE 하이퍼 파라메터
        self.discount_factor = 0.99
        self.learning_rate = 0.001

        self.model = REINFORCE(self.action_size)
        self.optimizer = Adam(lr=self.learning_rate)
        self.states, self.actions, self.rewards = [], [], []

    # 정책신경망으로 행동 선택
    def get_action(self, state):
        policy = self.model(state)[0]
        policy = np.array(policy)
        return np.random.choice(self.action_size, 1, p=policy)[0]

    # 반환값 계산
    def discount_rewards(self, rewards):
        discounted_rewards = np.zeros_like(rewards)
        running_add = 0
        for t in reversed(range(0, len(rewards))):
            running_add = running_add * self.discount_factor + rewards[t]
            discounted_rewards[t] = running_add
        return discounted_rewards

    # 한 에피소드 동안의 상태, 행동, 보상을 저장
    def append_sample(self, state, action, reward):
        self.states.append(state[0])
        self.rewards.append(reward)
        act = np.zeros(self.action_size)
        act[action] = 1
        self.actions.append(act)

    # 정책신경망 업데이트
    def train_model(self):
        discounted_rewards = np.float32(self.discount_rewards(self.rewards))
        discounted_rewards -= np.mean(discounted_rewards)
        discounted_rewards /= np.std(discounted_rewards)

        # 크로스 엔트로피 오류함수 계산
        model_params = self.model.trainable_variables
        with tf.GradientTape() as tape:
            tape.watch(model_params)
            policies = self.model(np.array(self.states))
            actions = np.array(self.actions)
            action_prob = tf.reduce_sum(actions * policies, axis=1)
            cross_entropy = -tf.math.log(action_prob + 1e-5)
            loss = tf.reduce_sum(cross_entropy * discounted_rewards)
            entropy = -policies * tf.math.log(policies)

        # 오류함수를 줄이는 방향으로 모델 업데이트
        grads = tape.gradient(loss, model_params)
        self.optimizer.apply_gradients(zip(grads, model_params))
        self.states, self.actions, self.rewards = [], [], []
        return np.mean(entropy)
Esempio n. 9
0
def train(generator,
          discriminator,
          train_ds,
          valid_ds,
          steps=2000,
          lr_rate=1e-4):
    generator_optimizer = Adam(learning_rate=lr_rate)
    discriminator_optimizer = Adam(learning_rate=lr_rate)
    vgg = vgg()

    pls_metric = tf.keras.metrics.Mean()
    dls_metric = tf.keras.metrics.Mean()

    steps = steps
    step = 0

    for lr, hr in train_ds.take(steps):
        step += 1

        with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
            lr = tf.cast(lr, tf.float32)
            hr = tf.cast(hr, tf.float32)

            # Forward pass
            sr = generator(lr, training=True)
            hr_output = discriminator(hr, training=True)
            sr_output = discriminator(sr, training=True)

            # Compute losses
            con_loss = content_loss(vgg, hr, sr)
            gen_loss = generator_loss(sr_output)
            perc_loss = con_loss + 0.001 * gen_loss
            disc_loss = discriminator_loss(hr_output, sr_output)

        # Compute gradient of perceptual loss w.r.t. generator weights
        gradients_of_generator = gen_tape.gradient(
            perc_loss, generator.trainable_variables)
        # Compute gradient of discriminator loss w.r.t. discriminator weights
        gradients_of_discriminator = disc_tape.gradient(
            disc_loss, discriminator.trainable_variables)

        # Update weights of generator and discriminator
        generator_optimizer.apply_gradients(
            zip(gradients_of_generator, generator.trainable_variables))
        discriminator_optimizer.apply_gradients(
            zip(gradients_of_discriminator, discriminator.trainable_variables))

        pl, dl = perc_loss, disc_loss
        pls_metric(pl)
        dls_metric(dl)

        print(
            f'{step}/{steps}, perceptual loss = {pls_metric.result():.4f}, discriminator loss = {dls_metric.result():.4f}'
        )
        pls_metric.reset_states()
        dls_metric.reset_states()

    generator.save_weights('pre-trained/generator.h5')
    discriminator.save_weights('pre-trained/discriminator.h5')
Esempio n. 10
0
class TrainAgent:
    def __init__(self, env: '', episodes=1000, alpha=0.01, gamma=0.9, alpha_decay_rate=0.9):
        self.env = Environment(env=env)
        self.episodes = episodes
        self.lr = ExponentialDecay(alpha, episodes, alpha_decay_rate)
        self.optimizer = Adam(self.lr)
        self.action_count, self.states_count = self.env.spaces_count()
        self.gamma = gamma
        self._net = ReinforcePolicyNet(action_count=self.action_count, states_count=self.states_count)
        self._model = ReinforcePolicyModel(self._net)
        self._agent = ReinforcePolicyAgent(env=self.env, model=self._model, gamma=gamma)
        self.huber_loss = Huber(reduction=tf.keras.losses.Reduction.SUM)

    def compute_loss(self, action_prob, epi_return, values):
        """
            actually action prob is our policy that give each action a probability over a distribution.
            here the actor loss is -mean(pi(a|s) * Bt) which should be minimized, 'Bt ' refers to the Baseline that we use
            with the purpose of reducing the variance
        """
        advantage = epi_return - values
        prob = tf.math.log(action_prob + 1e-30)
        actor_loss = -tf.math.reduce_mean(prob * advantage)

        critic_loss = self.huber_loss(values, epi_return)

        return critic_loss + actor_loss

    @tf.function
    def train_step(self, init_state: tf.Tensor):
        with tf.GradientTape() as tape:
            episode_return, action_probs, rewards, values = self._agent.run(max_steps=200, init_state=init_state)
            loss = self.compute_loss(action_prob=action_probs, epi_return=episode_return, values=values)

        grads = tape.gradient(loss, self._net.trainable_variables)
        self.optimizer.apply_gradients(zip(grads, self._net.trainable_variables))

        episode_rewards = tf.math.reduce_sum(rewards)
        return episode_rewards

    @staticmethod
    def plot_me(total, avg, cnt):
        plt.clf()
        plt.plot(cnt, total, label='rewards')
        plt.plot(cnt, avg, label='average reward')
        plt.legend()
        plt.pause(0.01)

    def run(self):
        e_r = []
        count = []
        avg_reward = []
        for episode in range(self.episodes):
            init_state = tf.constant(self.env.reset_env(), dtype=tf.float32)
            e_r.append(int(self.train_step(init_state)))
            count.append(episode)
            avg = sum(e_r) / len(count)
            avg_reward.append(avg)
            self.plot_me(e_r,avg_reward, count)
            print(f"episode {episode}/{self.episodes}, reward: {e_r[episode]}")
Esempio n. 11
0
def _train_vanilla_gan_on_mnist(args):
    model_name = args.model_name
    n_epochs = args.n_epochs
    batch_size = args.batch_size

    generator_model = GeneratorModelMNIST(**args)
    discriminator_model = DiscriminatorModelMNIST(**args)
    generator_optimizer = Adam(1e-4)
    discriminator_optimizer = Adam(1e-4)

    data_generator = get_mnist_dataset()

    noise_dim = args.generator_noise_dim
    num_examples_to_generate = args.num_examples_to_generate
    seed = tf.random.normal([num_examples_to_generate, noise_dim])

    plotting_callback = ml_utils.PlotAndSaveImages(test_input=seed,
                                                   model=generator_model,
                                                   model_name=model_name)

    gen_ckpt = ml_utils.SimpleModelCheckPoint(model_name="mnist_generator",
                                              model=generator_model)
    disc_ckpt = ml_utils.SimpleModelCheckPoint(
        model_name="mnist_discriminator", model=discriminator_model)

    num_iterations = len(data_generator)

    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        for epoch in range(n_epochs):
            start = time()
            for i, image_batch in enumerate(data_generator):
                input_noise = tf.random.uniform(shape=(image_batch.shape[0],
                                                       noise_dim))
                generated_images = generator_model(input_noise, training=True)

                true_output = discriminator_model(image_batch, training=True)
                fake_output = discriminator_model(generated_images,
                                                  training=True)

                gen_loss = generator_loss(fake_output)
                disc_loss = discriminator_loss(true_output, fake_output)

                gen_gradients = gen_tape.gradient(
                    gen_loss, generator_model.trainable_variables)
                disc_gradients = disc_tape.gradient(
                    disc_loss, discriminator_model.trainable_variables)

                generator_optimizer.apply_gradients(
                    zip(gen_gradients, generator_model.trainable_variables))
                discriminator_optimizer.apply_gradients(
                    zip(disc_gradients,
                        discriminator_model.trainable_variables))

            logs = {"loss": gen_loss}
            ckpt.on_epoch_end(epoch=epoch)
            plotting_callback.on_train_end()
            general_utils.smart_print(start, len(data_generator), i, epoch,
                                      n_epochs, gen_loss, disc_loss)
Esempio n. 12
0
class IVModel(Model):
    def __init__(self):
        super().__init__()

    def call(self, x):
        for layer in self.h_layers:
            x = layer(x)
        return x

    def _add_loss(self, loss):
        self.loss = loss

    def _set_lr(self, lr):
        if lr is not None:
            self.optimizer = Adam(lr=lr)

    def train_step(self, x_batch):
        total_increment = tf.squeeze(x_batch[:, -1, 0] - x_batch[:, 0, 0])
        with tf.GradientTape() as tape:
            int_var = self(x_batch)
            int_var = tf.squeeze(int_var)
            loss_value = self.loss(total_increment, int_var)
        grads = tape.gradient(loss_value, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        return loss_value

    def train(self, x_train, num_epochs, batch_size, lr=None, true_int_var=None, show_loss = True, show_hist=False):
        self._set_lr(lr)
        n_steps = x_train.shape[0] // batch_size
        x_train = create_train_dataset(x_train, batch_size)
        losses = []
        mses = []
        for epoch in range(num_epochs):
            with tqdm_notebook(total=n_steps, desc=f'Epoch {epoch+1} of {num_epochs}') as progress:
                for step, x_batch in enumerate(x_train):
                    progress.update()
                    loss_val = self.train_step(x_batch)
                    losses.append(loss_val.numpy())
            if show_loss:
                plot_loss(losses)
            int_var = self(x_batch).numpy().squeeze()
            if true_int_var:
                mse_val = _mse_metric(true_int_var, int_var)
                mses.append(mse_val.numpy())
            if show_hist:
                plot_hist(x_batch, int_var, true_int_var)

        self.history = {'loss': losses, 'mse': mses}
        return self.history

    def predict_iv(self, x):
        iv = self(x).numpy()
        return iv.squeeze()

    def predict_z(self, x):
        iv = self.predict_iv(x)
        z = (x[:,-1] - x[:,0])/np.sqrt(iv)
        return z
class DQN:
    def __init__(self, num_states, num_actions, model, target_model, buffer,
                 gamma, batch_size, learning_rate, min_experience):
        self.num_states = num_states
        self.num_actions = num_actions
        self.model = model
        self.target_model = target_model
        self.buffer = buffer
        self.gamma = gamma
        self.batch_size = batch_size
        self.optimizer = Adam(learning_rate=learning_rate)
        self.min_experience = min_experience

    def predict(self, state):
        return self.model(np.atleast_2d(state.astype('float32')))

    def update_model(self, target_model):
        if len(self.buffer.buffer['state']) < self.min_experience:
            return 0

        # Get mini batch
        index = np.random.randint(low=0,
                                  high=len(self.buffer.buffer['state']),
                                  size=self.batch_size)
        states = np.asarray([self.buffer.buffer['state'][i] for i in index])
        actions = np.asarray([self.buffer.buffer['action'][i] for i in index])
        rewards = np.asarray([self.buffer.buffer['reward'][i] for i in index])
        next_states = np.asarray(
            [self.buffer.buffer['next_state'][i] for i in index])
        dones = np.asarray([self.buffer.buffer['done'][i] for i in index])

        next_action_values = np.max(target_model.predict(next_states), axis=1)
        # print(next_action_values)
        # np.where allows us to have if the first argument is true, choose the
        # second argument, otherwise choose the third argument
        # done = True means it's a terminal state, so we only have a reward, and
        # no discounted action values from the next state.
        target_values = np.where(dones, rewards,
                                 rewards + self.gamma * next_action_values)
        # print(target_values)

        # Update neural network weights
        with tf.GradientTape() as tape:
            action_values = tf.math.reduce_sum(
                self.predict(states) * tf.one_hot(actions, self.num_actions),
                axis=1)
            # print(action_values)
            # Q network is trained byb minimising loss function
            loss = tf.math.reduce_mean(tf.square(target_values -
                                                 action_values))
        # Gradient descent by differentiating loss function w.r.t. weights
        variables = self.model.trainable_variables
        gradients = tape.gradient(loss, variables)
        # Update weights
        self.optimizer.apply_gradients(zip(gradients, variables))
        return loss
Esempio n. 14
0
class Critic(tf.keras.Model):
    def __init__(self,model_parameters=None):
        super().__init__(name = "critic")
        if model_parameters is None:
            model_parameters = {
                'lr': 0.0001,
                'beta1': 0,
                'batch_size': 64,
                'latent_dim': 128,
                'image_size': 152
            }
        self.layers_blocks = list()
        self.model_parameters = model_parameters
        dim = model_parameters['batch_size'] / 2
        init = RandomNormal(stddev=0.02)
        #Layers
        self.conv_1 = Conv2D(dim, (5, 5), strides=(2, 2), padding='same', kernel_initializer=init, input_shape=[model_parameters['image_size'], model_parameters['image_size'], 3])
        self.leaky_1 = LeakyReLU(alpha=0.2)
        
        number_of_layers_needed = int(math.log(model_parameters['image_size'],2))-3
        for i in range(number_of_layers_needed):
            dim *= 2
            self.layers_blocks.append([
               Conv2D(dim, (5, 5), strides=(2, 2), padding='same', kernel_initializer=init),
               LayerNormalization(),
               LeakyReLU(alpha=0.2)
            ])

        self.flat = Flatten()
        self.logits = Dense(1)  # This neuron tells us how real or fake the input is
        
        self.optimizer = Adam(learning_rate=model_parameters['lr'],beta_1=model_parameters['beta1'],beta_2=0.9)

    def call(self, input_tensor, training = True):
        ## Definition of Forward Pass
        x = self.leaky_1(self.conv_1(input_tensor))
        for i in range(len(self.layers_blocks)):
            layers_block = self.layers_blocks[i]
            for layer in layers_block:
                x = layer(x, training = training)
        x = self.flat(x)
        return self.logits(x)

    def compute_loss(self,y_true,y_pred):
        """ Wasserstein loss
        """
        return backend.mean(y_true * y_pred) 

    def backPropagate(self,gradients,trainable_variables):
        self.optimizer.apply_gradients(zip(gradients, trainable_variables))

    def save_optimizer(self):
        weights = self.optimizer.get_weights()
        data_access.store_weights_in_file('c_optimizer_weights',weights)
Esempio n. 15
0
def lipschitz_lb(f, X1, X2, iterations=1000, verbose=True):

    optimizer = Adam(lr=0.0001)

    X1 = tf.Variable(X1, name='x1', dtype='float32')
    X2 = tf.Variable(X2, name='x2', dtype='float32')
    
    max_L = None

    if verbose:
        pb = Progbar(iterations, stateful_metrics=['LC'])
    
    for _ in range(iterations):
        with tf.GradientTape() as tape:
            y1 = f(X1)
            y2 = f(X2)
            
            # The definition of the margin is not entirely symmetric: the top
            # class must remain the same when measuring both points. We assume
            # X1 is the reference point for determining the top class.
            original_predictions = tf.cast(
                tf.equal(y1, tf.reduce_max(y1, axis=1, keepdims=True)), 
                'float32')
            
            # This takes the logit at the top class for both X1 and X2.
            y1_j = tf.reduce_sum(
                y1 * original_predictions, axis=1, keepdims=True)
            y2_j = tf.reduce_sum(
                y2 * original_predictions, axis=1, keepdims=True)
            
            margin1 = y1_j - y1
            margin2 = y2_j - y2

            axes = tuple((tf.range(len(X1.shape) - 1) + 1).numpy())
            
            L = tf.abs(margin1 - margin2) / (tf.sqrt(
                tf.reduce_sum((X1 - X2)**2, axis=axes)) + EPS)[:,None]

            loss = -tf.reduce_max(L, axis=1)
            
        grad = tape.gradient(loss, [X1, X2])

        optimizer.apply_gradients(zip(grad, [X1, X2]))
        
        if max_L is None:
            max_L = L
        else:
            max_L = tf.maximum(max_L, L)

        if verbose:
            pb.add(1, [('LC', tf.reduce_max(max_L))])
        
    return tf.reduce_max(max_L)
Esempio n. 16
0
class ReinforceBaseLine(Reinforce):
    def __init__(self, env, actor_lr, critic_lr, policy, critic, gamma,
                 max_episodes, max_eps_steps):
        super().__init__(env, actor_lr, policy, gamma, max_episodes,
                         max_eps_steps)
        self.critic = critic
        self.critic_optimizer = Adam(critic_lr)

    def _run_episode(self):
        state = tf.constant(self.env.reset(), dtype=tf.float32)
        rewards = tf.TensorArray(tf.float32, 0, True)
        action_probs = tf.TensorArray(tf.float32, 0, True)
        state_values = tf.TensorArray(tf.float32, 0, True)

        state_shape = state.shape

        for step in tf.range(self.max_eps_steps):
            action, action_logits_step = self.get_action(state)
            action_probs_step = tf.nn.softmax(action_logits_step)[0, action]
            state, reward, done = self.tf_env_step(action)
            value = self.critic(tf.expand_dims(state, 0))

            self.steps_taken += 1

            action_probs = action_probs.write(step, action_probs_step)
            rewards = rewards.write(step, reward)
            state_values = state_values.write(step, value)

            state.set_shape(state_shape)

            if tf.cast(done, tf.bool):
                break
        return action_probs.stack(), rewards.stack(), state_values.stack()

    def train(self):
        with tf.GradientTape() as tape, tf.GradientTape() as tape2:
            action_probs, rewards, values = self._run_episode()
            discounted_rewards = compute_discounted_rewards(
                rewards, self.gamma)
            policy_loss = _compute_policy_loss(action_probs,
                                               discounted_rewards, values)
            critic_loss = huber_loss(values, discounted_rewards)
        policy_grads = tape.gradient(policy_loss,
                                     self.policy.trainable_variables)
        critic_grads = tape2.gradient(critic_loss,
                                      self.critic.trainable_variables)
        self.policy_optimizer.apply_gradients(
            zip(policy_grads, self.policy.trainable_variables))
        self.critic_optimizer.apply_gradients(
            zip(critic_grads, self.critic.trainable_variables))

        return rewards
Esempio n. 17
0
class ContinuousA2CAgent:
    def __init__(self, action_size, max_action):
        self.render = False

        # 행동의 크기 정의
        self.action_size = action_size
        self.max_action = max_action

        # 액터-크리틱 하이퍼파라미터
        self.discount_factor = 0.99
        self.learning_rate = 0.001

        # 정책신경망과 가치신경망 생성
        self.model = ContinuousA2C(self.action_size)
        # 최적화 알고리즘 설정, 미분값이 너무 커지는 현상을 막기 위해 clipnorm 설정
        self.optimizer = Adam(lr=self.learning_rate, clipnorm=1.0)

    # 정책신경망의 출력을 받아 확률적으로 행동을 선택
    def get_action(self, state):
        mu, sigma, _ = self.model(state)
        dist = tfd.Normal(loc=mu[0], scale=sigma[0])
        action = dist.sample([1])[0]
        action = np.clip(action, -self.max_action, self.max_action)
        return action

    # 각 타임스텝마다 정책신경망과 가치신경망을 업데이트
    def train_model(self, state, action, reward, next_state, done):
        model_params = self.model.trainable_variables
        with tf.GradientTape() as tape:
            mu, sigma, value = self.model(state)
            _, _, next_value = self.model(next_state)
            target = reward + (1 - done) * self.discount_factor * next_value[0]

            # 정책 신경망 오류 함수 구하기
            advantage = tf.stop_gradient(target - value[0])
            dist = tfd.Normal(loc=mu, scale=sigma)
            action_prob = dist.prob([action])[0]
            cross_entropy = -tf.math.log(action_prob + 1e-5)
            actor_loss = tf.reduce_mean(cross_entropy * advantage)

            # 가치 신경망 오류 함수 구하기
            critic_loss = 0.5 * tf.square(tf.stop_gradient(target) - value[0])
            critic_loss = tf.reduce_mean(critic_loss)

            # 하나의 오류 함수로 만들기
            loss = 0.1 * actor_loss + critic_loss

        # 오류함수를 줄이는 방향으로 모델 업데이트
        grads = tape.gradient(loss, model_params)
        self.optimizer.apply_gradients(zip(grads, model_params))
        return loss, sigma
Esempio n. 18
0
class Leaner:
    def __init__(self, config: MuZeroConfig, storage: SharedStorage,
                 replay_buffer: ReplayBuffer):
        self.config = config
        self.storage = storage
        self.replay_buffer = replay_buffer
        self.summary = create_summary(name="leaner")
        self.metrics_loss = Mean(f'leaner-loss', dtype=tf.float32)
        self.network = Network(self.config)
        self.lr_schedule = ExponentialDecay(
            initial_learning_rate=self.config.lr_init,
            decay_steps=self.config.lr_decay_steps,
            decay_rate=self.config.lr_decay_rate)
        self.optimizer = Adam(learning_rate=self.lr_schedule)

    def start(self):
        while self.network.training_steps() < self.config.training_steps:
            if ray.get(self.replay_buffer.size.remote()) > 0:

                self.train()

                if self.network.training_steps(
                ) % self.config.checkpoint_interval == 0:
                    weigths = self.network.get_weights()
                    self.storage.update_network.remote(weigths)

                if self.network.training_steps(
                ) % self.config.save_interval == 0:
                    self.network.save()

        print("Finished")

    def train(self):
        batch = ray.get(self.replay_buffer.sample_batch.remote())

        with tf.GradientTape() as tape:
            loss = self.network.loss_function(batch)

        grads = tape.gradient(loss, self.network.get_variables())
        self.optimizer.apply_gradients(zip(grads,
                                           self.network.get_variables()))

        self.metrics_loss(loss)
        with self.summary.as_default():
            tf.summary.scalar(f'loss', self.metrics_loss.result(),
                              self.network.training_steps())
        self.metrics_loss.reset_states()

        self.network.update_training_steps()
Esempio n. 19
0
class Generator(tf.keras.Model):
    def __init__(self,
                 latent_dim=256,
                 batch_size=64,
                 channels=[32, 64, 64, 128, 128]):
        super().__init__(name="Generator")
        cc = channels[-1]
        self.batch_size = batch_size
        self.latent_dim = latent_dim
        self.inp = InputLayer(input_shape=(self.latent_dim, ))
        self.dense_1 = Dense(8 * batch_size * 5 * 5, name='Generator_Dense_1')
        self.relu = ReLU()
        self.reshape_1 = Reshape((5, 5, 8 * batch_size))
        self.reses = list()
        for ch in reversed(channels[:-1]):
            self.reses.append([ResidualBlock(cc, ch), UpSampling2D()])
            cc = ch
        self.res_block_n = ResidualBlock(cc, cc)
        self.toRGB = Conv2D(3, (3, 3),
                            activation='tanh',
                            padding='same',
                            use_bias=False,
                            name='Generator_To_RGB')
        self.optimizer = Adam(learning_rate=0.0002, beta_1=0.5, beta_2=0.9)

    def call(self, input_tensor, training=True):
        x = self.inp(input_tensor)
        x = self.dense_1(x)
        x = self.relu(x)
        x = self.reshape_1(x)
        for i in range(len(self.reses)):
            x = self.reses[i][0](x, training=training)
            x = self.reses[i][1](x, training=training)
            if (i == 1):
                x = Cropping2D(cropping=((1, 0), (1, 0)))(x)
        x = self.res_block_n(x, training=training)
        x = self.toRGB(x)
        return x

    def backPropagate(self, gradients, trainable_variables):
        self.optimizer.apply_gradients(zip(gradients, trainable_variables))

    def set_seed(self):
        self.seed = tf.random.normal([self.batch_size, self.latent_dim])
        data_access.store_seed_in_file('seed', self.seed)

    def load_seed(self):
        self.seed = data_access.load_seed_from_file('seed')
class Generator(tf.keras.Model):
    
    def __init__(self, random_noise_size = 128,batch_s = 64):
        super().__init__(name='generator')
        #layers
        init = RandomNormal(stddev=0.02)
        dim = 4 * batch_s
        self.dense_1 = Dense(7*7*dim, use_bias = False, input_shape = (random_noise_size,))
        self.batchNorm1 = BatchNormalization()
        self.leaky_1 = LeakyReLU(alpha=0.2)
        self.reshape_1 = Reshape((7,7,dim))
        
        self.up_2 = UpSampling2D((1,1), interpolation='nearest')
        self.conv2 = Conv2D(dim/2, (5, 5), strides = (1,1), padding = "same", use_bias = False, kernel_initializer=init)
        self.batchNorm2 = BatchNormalization()
        self.leaky_2 = LeakyReLU(alpha=0.2)
        
        self.up_3 = UpSampling2D((2,2), interpolation='nearest')
        self.conv3 = Conv2D(dim/4, (5, 5), strides = (1,1), padding = "same", use_bias = False, kernel_initializer=init)
        self.batchNorm3 = BatchNormalization()
        self.leaky_3 = LeakyReLU(alpha=0.2)
        
        self.up_4 = UpSampling2D((2,2), interpolation='nearest')
        self.conv4 = Conv2D(1, (5, 5), activation='tanh', strides = (1,1), padding = "same", use_bias = False, kernel_initializer=init)

        self.optimizer = Adam(learning_rate=0.0001,beta_1=0,beta_2=0.9)
        self.seed = tf.random.normal([batch_s, random_noise_size])
               
    def call(self, input_tensor):
        ## Definition of Forward Pass
        x = self.leaky_1(self.batchNorm1(self.reshape_1(self.dense_1(input_tensor))))
        x = self.leaky_2(self.batchNorm2(self.conv2(self.up_2(x))))
        x = self.leaky_3(self.batchNorm3(self.conv3(self.up_3(x))))
        x = self.conv4(self.up_4(x))
        return x
    
    def generate_noise(self,batch_size, random_noise_size):
        return tf.random.normal([batch_size, random_noise_size])

    def compute_loss(self,y_true,y_pred,class_wanted,class_prediction):
        """ Wasserstein loss - prob of classifier get it right
        """
        k = 10 # hiper-parameter
        kl = KLDivergence()
        return backend.mean(y_true * y_pred) + (k * kl(class_wanted,class_prediction))

    def backPropagate(self,gradients,trainable_variables):
        self.optimizer.apply_gradients(zip(gradients, trainable_variables))
Esempio n. 21
0
class A2CAgent:
    def __init__(self, action_size):
        self.render = False

        # 행동의 크기 정의
        self.action_size = action_size

        # 액터-크리틱 하이퍼파라미터
        self.discount_factor = 0.99
        self.learning_rate = 0.001

        # 정책신경망과 가치신경망 생성
        self.model = A2C(self.action_size)
        # 최적화 알고리즘 설정, 미분값이 너무 커지는 현상을 막기 위해 clipnorm 설정
        self.optimizer = Adam(lr=self.learning_rate, clipnorm=5.0)

    # 정책신경망의 출력을 받아 확률적으로 행동을 선택
    def get_action(self, state):
        policy, _ = self.model(state)
        policy = np.array(policy[0])
        return np.random.choice(self.action_size, 1, p=policy)[0]

    # 각 타임스텝마다 정책신경망과 가치신경망을 업데이트
    def train_model(self, state, action, reward, next_state, done):
        model_params = self.model.trainable_variables
        with tf.GradientTape() as tape:
            policy, value = self.model(state)
            _, next_value = self.model(next_state)
            target = reward + (1 - done) * self.discount_factor * next_value[0]

            # 정책 신경망 오류 함수 구하기
            one_hot_action = tf.one_hot([action], self.action_size)
            action_prob = tf.reduce_sum(one_hot_action * policy, axis=1)
            cross_entropy = -tf.math.log(action_prob + 1e-5)
            advantage = tf.stop_gradient(target - value[0])
            actor_loss = tf.reduce_mean(cross_entropy * advantage)

            # 가치 신경망 오류 함수 구하기
            critic_loss = 0.5 * tf.square(tf.stop_gradient(target) - value[0])
            critic_loss = tf.reduce_mean(critic_loss)

            # 하나의 오류 함수로 만들기
            loss = 0.2 * actor_loss + critic_loss

        # 오류함수를 줄이는 방향으로 모델 업데이트
        grads = tape.gradient(loss, model_params)
        self.optimizer.apply_gradients(zip(grads, model_params))
        return np.array(loss)
Esempio n. 22
0
class DeepSARSAgent:
    def __init__(self, state_size, action_size):
        # 상태의 크기와 행동의 크기 정의
        self.state_size = state_size
        self.action_size = action_size

        # 딥살사 하이퍼 파라메터
        self.discount_factor = 0.99
        self.learning_rate = 0.001
        self.epsilon = 1.
        self.epsilon_decay = .9999
        self.epsilon_min = 0.01
        self.model = DeepSARSA(self.action_size)
        self.optimizer = Adam(lr=self.learning_rate)

    # 입실론 탐욕 정책으로 행동 선택
    def get_action(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        else:
            q_values = self.model(state)
            return np.argmax(q_values[0])

    # <s, a, r, s', a'>의 샘플로부터 모델 업데이트
    def train_model(self, state, action, reward, next_state, next_action,
                    done):
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

        # 학습 파라메터
        model_params = self.model.trainable_variables
        with tf.GradientTape() as tape:
            tape.watch(model_params)
            predict = self.model(state)[0]
            one_hot_action = tf.one_hot([action], self.action_size)
            predict = tf.reduce_sum(one_hot_action * predict, axis=1)

            # done = True 일 경우 에피소드가 끝나서 다음 상태가 없음
            next_q = self.model(next_state)[0][next_action]
            target = reward + (1 - done) * self.discount_factor * next_q

            # MSE 오류 함수 계산
            loss = tf.reduce_mean(tf.square(target - predict))

        # 오류함수를 줄이는 방향으로 모델 업데이트
        grads = tape.gradient(loss, model_params)
        self.optimizer.apply_gradients(zip(grads, model_params))
Esempio n. 23
0
class DeepSARSA_Agent:
    def __init__(self, step_size, action_size):
        # Define action_size, step_size
        self.step_size = step_size
        self.action_size = action_size

        # DeepSARSA hyper-parameters
        self.discount_factor = 0.99
        self.learning_rate = 0.001
        self.epsilon = 1.
        self.epsilon_decay = 0.9999
        self.epsilon_min = 0.1
        self.model = DeepSARSA(self.action_size)
        self.optimizer = Adam(self.learning_rate)

    # choose action based on epsilon-greedy policy
    def get_action(self, state):
        if np.random.randn() <= self.epsilon:
            return random.randrange(self.action_size)
        else:
            q_value = self.model(state)  # q_value.shape = (action_size,1)
            return np.argmax(q_value[0])

    # update model from <s,a,r,s',a'>
    def train_model(self, state, action, reward, next_state, next_action,
                    DONE):
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

        # train parameters
        model_params = self.model.trainable_variables
        with tf.GradientTape() as tape:
            tape.watch(model_params)
            predict = self.model(state)[0]
            one_hot_action = tf.one_hot([action], self.action_size)
            predict = tf.reduce_sum(one_hot_action * predict, axis=1)

            # if DONE = True -> there are no next_state,next_action
            next_q = self.model(next_state)[0][next_action]
            target = reward + (1 - DONE) * self.discount_factor * next_q

            # Calculate MSE loss function
            loss = tf.reduce_mean(tf.square(target - predict))

        # model update
        gradients = tape.gradient(loss, model_params)
        self.optimizer.apply_gradients(zip(gradients, model_params))
class Generator(tf.keras.Model):
    
    def __init__(self, random_noise_size = 100):
        super().__init__(name='generator')
        #layers
        
        self.dense_1 = Dense(7*7*256, use_bias = False, input_shape = (random_noise_size,))
        self.batchNorm1 = BatchNormalization()
        self.leaky_1 = LeakyReLU()
        self.reshape_1 = Reshape((7,7,256))
        
        #self.conv2 = Conv2DTranspose(128, (5, 5), strides = (1,1), padding = "same", use_bias = False)
        self.up_2 = UpSampling2D((1,1), interpolation='nearest')
        self.conv2 = Conv2D(128, (3, 3), strides = (1,1), padding = "same", use_bias = False)
        self.batchNorm2 = BatchNormalization()
        self.leaky_2 = LeakyReLU()
        
        #self.conv3 = Conv2DTranspose(64, (5, 5), strides = (2,2), padding = "same", use_bias = False)
        self.up_3 = UpSampling2D((2,2), interpolation='nearest')
        self.conv3 = Conv2D(64, (3, 3), strides = (1,1), padding = "same", use_bias = False)
        self.batchNorm3 = BatchNormalization()
        self.leaky_3 = LeakyReLU()
        
        #self.conv4 = Conv2DTranspose(1, (5, 5), strides = (2,2), padding = "same", use_bias = False, activation = "tanh")
        self.up_4 = UpSampling2D((2,2), interpolation='nearest')
        self.conv4 = Conv2D(1, (3, 3), strides = (1,1), padding = "same", use_bias = False)

        self.optimizer = Adam(1e-4)
        
    def call(self, input_tensor):
        ## Definition of Forward Pass
        x = self.reshape_1(self.leaky_1(self.batchNorm1(self.dense_1(input_tensor))))
        x = self.leaky_2(self.batchNorm2(self.conv2(self.up_2(x))))
        x = self.leaky_3(self.batchNorm3(self.conv3(self.up_3(x))))
        return  self.conv4(self.up_4(x))
    
    def generate_noise(self,batch_size, random_noise_size):
        return tf.random.normal([batch_size, random_noise_size])

    def objective(self,dx_of_gx):
        # Labels are true here because generator thinks he produces real images. 
        cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits = True)
        return cross_entropy(tf.ones_like(dx_of_gx), dx_of_gx) 
    
    def backPropagate(self,gradients,trainable_variables):
        self.optimizer.apply_gradients(zip(gradients, trainable_variables))
Esempio n. 25
0
def train(text_vectors, images, model, epochs, batch_size=128, lr=1e-4):
    loss_fn = tf.keras.losses.binary_crossentropy
    g_optimizer = Adam(lr)
    d_optimizer = Adam(lr)

    # Rescale -1 to 1
    images = images / 127.5 - 1.
    # images = np.expand_dims(images, axis=3)

    # Adversarial ground truths
    # valid = np.ones((batch_size, 1))
    # fake = np.zeros((batch_size, 1))

    for epoch in range(epochs):
        # idx = np.random.permutation(len(text_vectors))
        # img_batch = images[idx[:batch_size]]
        dataset = tf.data.Dataset.from_tensor_slices((images, text_vectors))
        dataset = dataset.shuffle(buffer_size=100)
        dataset = dataset.batch(batch_size)
        # img_dataset = tf.data.Dataset.from_tensor_slices(images)
        # img_dataset = img_dataset.batch(batch_size)
        # text_dataset = tf.data.Dataset.from_tensor_slices(text_vectors)
        # text_dataset = text_dataset.batch(batch_size)
        for data in dataset:
            fake_captions = derangement(data[1])
            with tf.GradientTape() as g_tape, tf.GradientTape() as d_tape:
                fake_image_pred, real_image_pred, fake_caption_pred = model(
                    data, fake_captions)
                fake_image_loss, real_image_loss, fake_caption_loss = \
                    loss_fn(tf.zeros_like(fake_image_pred), fake_image_pred), \
                    loss_fn(tf.ones_like(real_image_pred), real_image_pred), \
                    loss_fn(tf.zeros_like(fake_caption_pred), fake_caption_pred)
                d_loss = (fake_image_loss + real_image_loss +
                          fake_caption_loss) / 3
                g_loss = loss_fn(tf.ones_like(fake_image_pred),
                                 fake_image_pred)
            g_trainable_variables = model.text_encoder.trainable_variables + model.generator.trainable_variables
            g_grads = d_tape.gradient(d_loss, g_trainable_variables)
            d_grads = g_tape.gradient(g_loss,
                                      model.discriminator.trainable_variables)
            g_optimizer.apply_gradients(zip(g_grads, g_trainable_variables))
            d_optimizer.apply_gradients(
                zip(d_grads, model.discriminator.trainable_variables))

        # Plot the progress
        print("%d [D loss: %f] [G loss: %f]" % (epoch, d_loss[0], g_loss[0]))
Esempio n. 26
0
def train(model, content_ds, style_ds, loss, n_epochs=10, save_path=None):
    save_interval = 100
    optimizer = Adam(lr=1e-4, decay=5e-5)
    n_batches = len(content_ds) // content_ds.batch_size
    process = psutil.Process(os.getpid())
    alpha = 1.0

    for e in range(1, n_epochs + 1):
        losses = {"total": 0.0, "content": 0.0, "style": 0.0, "color": 0.0}

        pbar = tqdm(total=n_batches, ncols=50)
        for i in range(n_batches):
            # Get batch
            content, style = content_ds.get_batch(), style_ds.get_batch()
            if content is None or style is None:
                break

            # Train on batch
            # total_loss, content_loss, weighted_style_loss, weighted_color_loss
            with tf.GradientTape() as tape:
                prediction = model([content, style, alpha])
                loss_values = loss([content, style], prediction)

            grads = tape.gradient(loss_values[0], model.trainable_variables)
            optimizer.apply_gradients(zip(grads, model.trainable_variables))

            for key, lss in zip(losses.keys(), loss_values):
                losses[key] = (losses[key] * i + lss) / (i + 1)

            string = "".join([
                f"{key} loss: {value:.3f}\t" for key, value in losses.items()
            ])
            pbar.set_description(f"Epoch {e}/{n_epochs}\t" + string +
                                 f"memory: {process.memory_info().rss}\t")
            pbar.update(1)

            if i % save_interval == 0:
                if save_path:
                    model.save(save_path)
        time = datetime.datetime.now()
        print(time.date(), time.hour, time.minute)
        model.save(
            f'saved\models\epoch{e}_{time.date()}_{time.hour}_{time.minute}.h5'
        )
class Critic(tf.keras.Model):
    def __init__(self):
        super().__init__(name="critic")

        init = RandomNormal(stddev=0.2)
        #Layers
        self.conv_1 = SpectralNormalization(
            Conv2D(64, (3, 3),
                   strides=(2, 2),
                   padding='same',
                   kernel_initializer=init,
                   input_shape=[28, 28, 1]))
        self.leaky_1 = LeakyReLU(alpha=0.2)
        self.dropout_1 = Dropout(0.3)

        self.conv_2 = SpectralNormalization(
            Conv2D(128, (3, 3),
                   strides=(2, 2),
                   padding='same',
                   kernel_initializer=init))
        self.leaky_2 = LeakyReLU(alpha=0.2)
        self.dropout_2 = Dropout(0.3)

        self.flat = Flatten()
        self.logits = Dense(
            1)  # This neuron tells us if the input is fake or real

        self.optimizer = Adam(learning_rate=0.0001, beta_1=0, beta_2=0.9)

    def call(self, input_tensor):
        ## Definition of Forward Pass
        x = self.dropout_1(self.leaky_1(self.conv_1(input_tensor)))
        x = self.dropout_2(self.leaky_2(self.conv_2(x)))
        x = self.flat(x)
        return self.logits(x)

    def compute_loss(self, y_true, y_pred):
        """ Wasserstein loss
        """
        return backend.mean(y_true * y_pred)

    def backPropagate(self, gradients, trainable_variables):
        self.optimizer.apply_gradients(zip(gradients, trainable_variables))
Esempio n. 28
0
class Collection_Generator(tf.keras.Model):
    def __init__(self, generators):
        super().__init__(name="Coll_Generator")
        self.gens = generators
        self.optimizer = Adam(learning_rate=0.0001, beta_1=0, beta_2=0.9)
        self.n = 0

    def update_current_n_layer(self):
        self.n += 1

    def start_fading(self, n):
        self.gens[n].activate_fade_in()
        self.update_current_n_layer()

    def stop_fading(self, n):
        self.gens[n].disactivate_fade_in()

    def call(self, input_tensor):
        x = input_tensor
        for i in range(self.n + 1):
            x = self.gens[i](x)
        return x

    def set_seed(self):
        self.seed = tf.random.normal([16, 100])
        data_access.store_seed_in_file('seed', self.seed)

    def load_seed(self):
        self.seed = data_access.load_seed_from_file('seed')

    def generate_noise(self, batch_size, random_noise_size):
        return tf.random.normal([batch_size, random_noise_size])

    def backPropagate(self, gradients, trainable_variables):
        self.optimizer.apply_gradients(zip(gradients, trainable_variables))

    def compute_loss(self, y_true, y_pred, class_wanted, class_prediction):
        """ Wasserstein loss - prob of classifier get it right
        """
        k = 10  # hiper-parameter
        return backend.mean(
            y_true * y_pred
        )  # + (k * categorical_crossentropy(class_wanted,class_prediction))
class IntrinsicCuriosityModule:
    def __init__(self,
                 state_shape,
                 action_num,
                 latent_shape,
                 alpha=1e-4,
                 beta=0.2):
        self.icm = get_intrinsic_curiosity_module(state_shape, action_num,
                                                  latent_shape)
        self.beta = beta
        self.optimizer = Adam(learning_rate=alpha)

    def learn(self, states, actions, next_states):
        with tf.GradientTape() as tape:
            forward_losses, pred_actions = self(states, actions, next_states)
            forward_loss = K.mean(forward_losses)
            inv_loss = MeanSquaredError(actions, pred_actions)
            loss = (self.beta * forward_loss + (1 - self.beta) * inv_loss)
        grads = tape.gradient(loss, self.icm.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.icm.trainable_weights))
        self.states.clear()
        self.actions.clear()
        self.next_states.clear()
        return loss

    def __call__(self, state, action, next_state):
        breakpoint()
        # Expected shapes
        # state: [None, state_shape]
        # action: [None, action]
        # next_state: [None, state_shape]
        forward_loss, _ = self.icm(state, action, next_state)
        return forward_loss

    def save_module(state_features_filepath, forward_model_filepath,
                    inverse_model_filepath):
        pass

    @staticmethod
    def load_module(state_features_filepath, forward_model_filepath,
                    inverse_model_filepath):
        pass
Esempio n. 30
0
    def __init__(self, num_nets, state_dim, action_dim, learning_rate):
        """
        :param num_nets: number of networks in the ensemble
        :param state_dim: state dimension
        :param action_dim: action dimension
        :param learning_rate:
        """

        self.sess = tf.Session()
        self.num_nets = num_nets
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.learning_rate = learning_rate
        K.set_session(self.sess)

        # Log variance bounds
        self.max_logvar = tf.Variable(-3 * np.ones([1, self.state_dim]),
                                      dtype=tf.float32)
        self.min_logvar = tf.Variable(-7 * np.ones([1, self.state_dim]),
                                      dtype=tf.float32)
        # Define ops for model output and optimization
        self.inputs = list()
        self.losses = list()
        self.means = list()
        self.logvars = list()
        self.models = list()
        self.outputs = list()
        self.targets = list()
        self.optimizations = list()
        for model in range(self.num_nets):
            model, inp = self.create_network()
            self.inputs.append(inp)
            self.models.append(model)
            output = self.get_output(model.output)
            mean, logvar = output
            self.means.append(mean)
            self.logvars.append(logvar)
            self.outputs.append(output)
            target = tf.placeholder(tf.float32, shape=(None, self.state_dim))
            self.targets.append(target)
            var = tf.exp(logvar)
            inv_var = tf.divide(1, var)
            norm_output = mean - target
            # Calculate loss: Mahalanobis distance + log(det(cov))
            loss = tf.multiply(tf.multiply(norm_output, inv_var), norm_output)
            loss = tf.reduce_sum(loss, axis=1)
            loss += tf.math.log(tf.math.reduce_prod(var, axis=1))
            self.losses.append(loss)
            optimizer = Adam(lr=learning_rate)
            weights = model.trainable_weights
            gradients = tf.gradients(loss, weights)
            optimize = optimizer.apply_gradients(zip(gradients, weights))
            self.optimizations.append(optimize)
        self.sess.run(tf.initialize_all_variables())