Beispiel #1
0
def training_step(batch_size=256, done_is_dead=False):
    upper = replay_buffer_cap if replay_buffer_populated else replay_buffer_pos
    idx = np.arange(upper)
    np.random.shuffle(idx)
    batch_idx = idx[:batch_size]
    states = replay_buffer["states"][batch_idx]
    actions = replay_buffer["actions"][batch_idx]
    rewards = replay_buffer["rewards"][batch_idx]
    next_states = replay_buffer["next_states"][batch_idx]
    done = replay_buffer["done"][batch_idx]

    next_actions, next_actions_log_prob = sample_action(next_states)
    next_critic_input = tf.concat((next_states, next_actions), axis=-1)
    next_min_q = tf.minimum(critic_1_target(next_critic_input)[:, 0], critic_2_target(next_critic_input)[:, 0])
    q_target = rewards + gamma * (next_min_q - alpha * next_actions_log_prob) * (1.0 - done if done_is_dead else 1.0)

    critic_input = tf.concat((states, actions), axis=-1)
    with tf.GradientTape() as c_tape:
        critic_loss = MSE(critic_1(critic_input)[:, 0], q_target) + MSE(critic_2(critic_input)[:, 0], q_target)
    c1_grad, c2_grad = c_tape.gradient(critic_loss, [critic_1.trainable_weights, critic_2.trainable_weights])
    critic_opt.apply_gradients(zip(c1_grad, critic_1.trainable_weights))
    critic_opt.apply_gradients(zip(c2_grad, critic_2.trainable_weights))

    polyak(critic_1_target, critic_1)
    polyak(critic_2_target, critic_2)

    with tf.GradientTape() as a_tape:
        new_actions, new_actions_log_prob = sample_action(states)
        new_critic_input = tf.concat((states, new_actions), axis=-1)
        new_min_q = tf.minimum(critic_1(new_critic_input)[:, 0], critic_2(new_critic_input)[:, 0])
        actor_loss = -(new_min_q - alpha * new_actions_log_prob)
    a_grad = a_tape.gradient(actor_loss, actor.trainable_weights)
    aopt.apply_gradients(zip(a_grad, actor.trainable_weights))
Beispiel #2
0
    def call(self, model, obs):
        """
            model = [y, x_ae, x_adv, y_adv_real, weights, evals, evecs, phi]
        """
        y = tf.identity(model[0])
        x_ae = tf.identity(model[1])
        x_adv = tf.identity(model[2])
        weights = model[4]
        pred_horizon = -1

        # Autoencoder reconstruction
        self.loss_recon = tf.reduce_mean(MSE(obs, x_ae))

        # DMD reconstruction in the latent space
        self.loss_dmd = self.dmdloss(y)

        # Future state prediction
        self.loss_pred = tf.reduce_mean(
            MSE(obs[:, :pred_horizon, :], x_adv[:, :pred_horizon, :]))

        # Regularization on weights
        self.loss_reg = tf.add_n([tf.nn.l2_loss(w) for w in weights])

        # Total loss
        self.total_loss = self.a1 * self.loss_recon + self.a2 * self.loss_dmd + \
                          self.a3 * self.loss_pred + self.a4 * self.loss_reg

        return self.total_loss
    def _learn_tf(self, experiences, gamma):
        states, actions, rewards, next_states, dones = experiences

        # ---------------------------- update critic ---------------------------- #
        with tf.GradientTape(persistent=True) as tape:
            # Get predicted next-state actions and Q values from target models
            actions_next = self.actor_target.model(next_states)
            actions_next += tf.clip_by_value(
                tf.random.normal(shape=tf.shape(actions_next),
                                 mean=0.0,
                                 stddev=1e-3,
                                 dtype=tf.float64), -1e-3, 1e-3)
            Q1 = self.critic_target.model([next_states, actions_next])
            Q2 = self.critic2_target.model([next_states, actions_next])
            Q_targets_next = tf.math.minimum(Q1, Q2)
            # Compute Q targets for current states (y_i)
            Q_targets = rewards + (gamma * Q_targets_next * (1 - dones))
            # Compute critic loss
            Q1_expected = self.critic_local.model([states, actions])
            Q2_expected = self.critic2_local.model([states, actions])
            critic_loss = MSE(Q1_expected, Q_targets) + MSE(
                Q2_expected, Q_targets)

        # Minimize the loss
        critic1_grad = tape.gradient(
            critic_loss, self.critic_local.model.trainable_variables)
        critic2_grad = tape.gradient(
            critic_loss, self.critic2_local.model.trainable_variables)
        self.critic_optimizer.apply_gradients(
            zip(critic1_grad, self.critic_local.model.trainable_variables))
        self.critic_optimizer.apply_gradients(
            zip(critic2_grad, self.critic2_local.model.trainable_variables))

        if self.train_step % self.actor_update_freq:
            # ---------------------------- update actor ---------------------------- #
            with tf.GradientTape() as tape:
                # Compute actor loss
                actions_pred = self.actor_local.model(states)
                actor_loss = -tf.reduce_mean(
                    self.critic_local.model([states, actions_pred]))

            # Minimize the loss
            actor_grad = tape.gradient(
                actor_loss, self.actor_local.model.trainable_variables)
            self.actor_optimizer.apply_gradients(
                zip(actor_grad, self.actor_local.model.trainable_variables))

            # ----------------------- update target networks ----------------------- #
            self.soft_update(self.critic_local.model, self.critic_target.model,
                             self.tau)
            self.soft_update(self.critic2_local.model,
                             self.critic_target.model, self.tau)
            self.soft_update(self.actor_local.model, self.actor_target.model,
                             self.tau)

        # ----------------------- decay noise ----------------------- #
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
def generate_image_adversary(model, image, label, eps=2 / 255.0):
    # cast the image
    image = tf.cast(image, tf.float32)

    # record our gradients
    with tf.GradientTape() as tape:
        # explicitly indicate that our image should be tacked for
        # gradient updates
        tape.watch(image)

        # use our model to make predictions on the input image and
        # then compute the loss
        pred = model(image)
        loss = MSE(label, pred)

    # calculate the gradients of loss with respect to the image, then
    # compute the sign of the gradient
    gradient = tape.gradient(loss, image)
    signedGrad = tf.sign(gradient)

    # construct the image adversary
    adversary = (image + (signedGrad * eps)).numpy()

    # return the image adversary to the calling function
    return adversary
Beispiel #5
0
    def __init__(self, n_rec, state_dim, action_dim, sess):
        self.n_rec = n_rec
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.sess = sess

        self.user_ph = Input(shape=(self.state_dim, ), name='user')
        self.item_ph = Input(shape=(self.action_dim, ), name='item')
        self.true_ph = Input(shape=(1, ), name='true')

        net = Dense(20)(Concatenate()([self.user_ph, self.item_ph]))
        net = Dense(1)(net)
        self.rank_op = net

        self.loss = MSE(self.rank_op, self.true_ph)

        self.model = Model(inputs=[self.user_ph, self.item_ph],
                           outputs=self.rank_op)

        self.model.compile(loss=MSE, optimizer="adam")

        self.lr = 1e-4
        self.batch = 32
        self.n_iter = 10

        self.memory = []

        self.last_action = None
        self.last_user = None
Beispiel #6
0
    def learn(self, experiences, gamma):
        """Update Q parameters using given batch of experience tuples.
        """
        states, actions, rewards, next_states, dones = experiences

        # ---------------------------- update critic ---------------------------- #
        with tf.GradientTape() as tape:
            # Get max predicted Q values from target models
            Q_targets_next = tf.reduce_max(self.q_target.model(next_states),
                                           axis=-1)
            # Compute Q targets for current states (y_i)
            Q_targets = rewards + (gamma * Q_targets_next * (1 - dones))
            # Get expected Q values from local model
            Q_expected = self.q_local.model(states)
            idx = tf.cast(actions, tf.int32)
            Q_expected = tf.gather_nd(
                Q_expected,
                tf.stack([tf.range(Q_expected.shape[0]), idx], axis=1))
            # Calculate the loss
            loss = MSE(Q_targets, Q_expected)

        # Minimize the loss
        grad = tape.gradient(loss, self.q_local.model.trainable_variables)
        self.q_optimizer.apply_gradients(
            zip(grad, self.q_local.model.trainable_variables))

        # ----------------------- update target networks ----------------------- #
        self.soft_update(self.q_local.model, self.q_target.model, self.tau)
Beispiel #7
0
    def loss():

        loss = 0
        image_batch, targets_init_batch, targets_time_batch, actions_time_batch, \
        mask_time_batch, dynamic_mask_time_batch = batch

        # make initial step from the real observation: representation + prediction networks
        representation_batch, value_batch, policy_batch = network.initial_model(np.array(image_batch))

        # Only update the element with a policy target
        target_value_batch, _, target_policy_batch = zip(*targets_init_batch)
        mask_policy = list(map(lambda l: bool(l), target_policy_batch))
        target_policy_batch = list(filter(lambda l: bool(l), target_policy_batch))
        policy_batch = tf.boolean_mask(policy_batch, mask_policy)

        # Compute the loss of the first pass
        value_support_size = len(value_batch[0])
        loss += tf.math.reduce_mean(loss_value(target_value_batch, value_batch, value_support_size))
        loss += tf.math.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=policy_batch, labels=target_policy_batch))

        # Recurrent steps, from action and previous hidden state.
        for actions_batch, targets_batch, mask, dynamic_mask in zip(actions_time_batch, targets_time_batch,
                                                                    mask_time_batch, dynamic_mask_time_batch):
            target_value_batch, target_reward_batch, target_policy_batch = zip(*targets_batch)

            # Only execute BPTT for elements with an action
            representation_batch = tf.boolean_mask(representation_batch, dynamic_mask)
            target_value_batch = tf.boolean_mask(target_value_batch, mask)
            target_reward_batch = tf.boolean_mask(target_reward_batch, mask)
            # Creating conditioned_representation: concatenate representations with actions batch
            actions_batch = tf.one_hot(actions_batch, network.action_size)

            # Recurrent step from conditioned representation: recurrent + prediction networks
            conditioned_representation_batch = tf.concat((representation_batch, actions_batch), axis=1)
            representation_batch, reward_batch, value_batch, policy_batch = network.recurrent_model(
                conditioned_representation_batch)

            # Only execute BPTT for elements with a policy target
            target_policy_batch = [policy for policy, b in zip(target_policy_batch, mask) if b]
            mask_policy = list(map(lambda l: bool(l), target_policy_batch))
            target_policy_batch = tf.convert_to_tensor([policy for policy in target_policy_batch if policy])
            policy_batch = tf.boolean_mask(policy_batch, mask_policy)

            # Compute the partial loss
            l = (tf.math.reduce_mean(loss_value(target_value_batch, value_batch, network.value_support_size)) +
                 MSE(target_reward_batch, tf.squeeze(reward_batch)) +
                 tf.math.reduce_mean(
                     tf.nn.softmax_cross_entropy_with_logits(logits=policy_batch, labels=target_policy_batch)))

            # Scale the gradient of the loss by the average number of actions unrolled
            gradient_scale = 1. / len(actions_time_batch)
            loss += scale_gradient(l, gradient_scale)

            # Half the gradient of the representation
            representation_batch = scale_gradient(representation_batch, 0.5)

        return loss
 def train_step(image):
     with tf.GradientTape() as tape:
         pred_image = model(image)
         model_trainable_variables = model.trainable_variables
         loss = MSE(image, pred_image)
         gradients = tape.gradient(loss, model_trainable_variables)
         optimizer.apply_gradients(zip(gradients,
                                       model_trainable_variables))
         train_loss(loss)
Beispiel #9
0
    def naive_smoothness(self, input_data):
        """Computes the smoothness of the network in comparison with the Taylor network at selected points

        Args:
            input_data (np.ndarray(n_samples, n_input)): The data to compare the networks on
        Returns:
            float: smoothness_value
        """
        return np.mean(
            MSE(self.network(input_data), self.__call__(input_data)).numpy())
Beispiel #10
0
    def pre_train(self,
                  epochs=80,
                  info_step=10,
                  lr=2e-3,
                  W_a=0.3,
                  W_x=1,
                  W_d=0,
                  min_dist=0.5,
                  max_dist=20):

        optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
        if self.sparse == True:
            self.adj_n = tfp.math.dense_to_sparse(self.adj_n)

        # Training
        for epoch in range(1, epochs + 1):
            with tf.GradientTape(persistent=True) as tape:
                z = self.encoder([self.X, self.adj_n])
                X_out = self.decoderX(z)
                A_out = self.decoderA(z)
                if W_d:
                    Dist_loss = tf.reduce_mean(
                        dist_loss(z, min_dist, max_dist=max_dist))
                A_rec_loss = tf.reduce_mean(MSE(self.adj, A_out))
                X_rec_loss = tf.reduce_mean(MSE(self.X, X_out))
                loss = W_a * A_rec_loss + W_x * X_rec_loss
                if W_d:
                    loss += W_d * Dist_loss

            vars = self.trainable_weights
            grads = tape.gradient(loss, vars)
            optimizer.apply_gradients(zip(grads, vars))

            if epoch % info_step == 0:
                if W_d:
                    print("Epoch", epoch, " X_rec_loss:", X_rec_loss.numpy(),
                          "  A_rec_loss:", A_rec_loss.numpy(), "  Dist_loss:",
                          Dist_loss.numpy())
                else:
                    print("Epoch", epoch, " X_rec_loss:", X_rec_loss.numpy(),
                          "  A_rec_loss:", A_rec_loss.numpy())
        print("Pre_train Finish!")
Beispiel #11
0
    def MSE(x: tf.Tensor, x_decoded: tf.Tensor) -> tf.Tensor:
        """MSE-loss optimized for variational inference.

        MSE = E_q(z|x) log p(x|z)
        Here in conjunction to the variational loss:
        MSE = E log p(x|z)
        """
        cross_ent = MSE(x, x_decoded)
        cross_ent = tf.reshape(cross_ent, [tf.shape(x)[0], -1])
        logpx_z = -tf.reduce_sum(cross_ent, axis=1)
        return -tf.reduce_mean(logpx_z)
Beispiel #12
0
def train_q_network(agent, state, action, reward, next_state, not_done, optimizer):
    """Trains the Q-network."""

    q_target = tf.cast(reward, tf.float32) + tf.cast(not_done, tf.float32) * GAMMA * agent.max_q(next_state)

    with tf.GradientTape() as tape:
        q_vals = agent.q_val(state, action)
        loss = MSE(q_target, q_vals)
        gradients = tape.gradient(loss, agent.model.trainable_variables)

    optimizer.apply_gradients(zip(gradients, agent.model.trainable_variables))
    return loss
Beispiel #13
0
def custom_loss(s_pred, t_pred, labels, epoch):
	w = 50 * np.exp(-5 * (1 - (epoch / 80)) ** 2)
	bce = BinaryCrossentropy()
	# ignores values set to -1
	mask=np.where(labels[:,0]>=0,1,0)
	ll = bce(y_pred=s_pred, y_true=labels, sample_weight=mask)
	ll=ll/(len(s_pred[-1]))
	if t_pred is not None:
		lu = MSE(s_pred, t_pred) / (len(s_pred[-1]))
		return tf.math.reduce_mean(ll) + w*tf.math.reduce_mean(lu)
	else:
		return tf.math.reduce_mean(ll)
Beispiel #14
0
    def loss(y_true, y_pred):

        A, k, S, B = prms

        #Set S values to lie within a range given by parameters
        def S_cost(Ss, dev_pts):
            Cost = 0
            for i in range(len(Ss)):
                dif_sq = (S[i] - Ss[i])**2
                Cost += cond(dif_sq > dev_pts[i], lambda: 30 * (dif_sq + 0.8),
                             lambda: 0.)
            return Cost

        #Set each function to have a specific orientation
        def sign_cost(Ss, posit):
            Cost = 0
            for i in range(len(Ss)):
                Cost += cond(A[i] * k[i] * posit[i] > 0., lambda: tf_abs(A[i]),
                             lambda: 0.)
            return Cost

        def ChemicalRequire(Peak, Ss):
            """Add chemical requirements: 
            Req. 1: The 3 first curves are CO2 absorption, 4th and 5th correspond to loss of it.
            Req. 2: First curve should start at 0.
            Next versions: This should be modified so user can add this requirements without having to look up at this code
            """
            ########## All of the below code is hard-coded.
            #Sum all N-3 curves (all but 3 last).
            First3Sum = sum([tf_abs(A[i]) for i in range(len(posit))]) - sum(
                [tf_abs(A[-i]) for i in range(1, 4)])

            #TotalSum - Last3Sum = Peak
            Diff = tf_abs(First3Sum - Peak)
            #Next 2 curves also equal Peak
            Dif2 = tf_abs(A[-2] + A[-3] - Peak)

            Req1Cost = cond(Diff + Dif2 > 1e-6, lambda: 50.0 * (Diff + Dif2),
                            lambda: 0.)
            ########## All of the above code is hard-coded. Fix for next versions

            #Req 2: First curve starts at 0. This is achieved summing all Ai such that ki>0, + B = 0
            SumOffA = B
            for i in range(len(Ss)):
                SumOffA += A[i] * cond(k[i] > 0., lambda: 1., lambda: 0.)
            Req2Cost = cond(
                tf_abs(SumOffA) > 1e-3, lambda: 10.0 * tf_abs(SumOffA),
                lambda: 0.)

            return Req1Cost + Req2Cost

        return (MSE(y_true, y_pred) + S_cost(Ss, dev_pts) +
                sign_cost(Ss, posit) + 1e-3 * ChemicalRequire(peak, Ss))
Beispiel #15
0
    def alt_train(self,
                  epochs=100,
                  lr=5e-4,
                  W_a=0.3,
                  W_x=1,
                  W_c=1.5,
                  info_step=8,
                  n_update=8,
                  centers=None):

        self.cluster_model.get_layer(name='clustering').clusters = centers

        # Training
        optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
        for epoch in range(0, epochs):

            if epoch % n_update == 0:
                q = self.cluster_model([self.X, self.adj_n])
                p = self.target_distribution(q)

            with tf.GradientTape(persistent=True) as tape:
                z = self.encoder([self.X, self.adj_n])
                q_out = self.cluster_model([self.X, self.adj_n])
                X_out = self.decoderX(z)
                A_out = self.decoderA(z)
                A_rec_loss = tf.reduce_mean(MSE(self.adj, A_out))
                X_rec_loss = tf.reduce_mean(MSE(self.X, X_out))
                cluster_loss = tf.reduce_mean(KLD(q_out, p))
                tot_loss = W_a * A_rec_loss + W_x * X_rec_loss + W_c * cluster_loss

            vars = self.trainable_weights
            grads = tape.gradient(tot_loss, vars)
            optimizer.apply_gradients(zip(grads, vars))

            if epoch % info_step == 0:
                print("Epoch", epoch, " X_rec_loss: ", X_rec_loss.numpy(),
                      " A_rec_loss: ", A_rec_loss.numpy(), " cluster_loss: ",
                      cluster_loss.numpy())
Beispiel #16
0
 def loss_function(y_true, y_pred):
     if isinstance(transform, str) and transform.lower() == 'disc':
         return losses.discriminative_instance_loss(y_true, y_pred)
     if isinstance(transform,
                   str) and transform.lower() == 'watershed-cont':
         return MSE(y_true, y_pred)
     if focal:
         return losses.weighted_focal_loss(y_true,
                                           y_pred,
                                           gamma=gamma,
                                           n_classes=n_classes)
     return losses.weighted_categorical_crossentropy(y_true,
                                                     y_pred,
                                                     n_classes=n_classes)
def create_model(albert_config, is_training, a_input_ids, a_input_mask, a_segment_ids,
        b_input_ids, b_input_mask, b_segment_ids, labels, num_labels, use_one_hot_embeddings):
  """Creates a classification model."""
  #import pdb
  #pdb.set_trace()
  a_model = modeling.AlbertModel(
      config=albert_config,
      is_training=is_training,
      input_ids=a_input_ids,
      input_mask=a_input_mask,
      token_type_ids=a_segment_ids,
      use_one_hot_embeddings=use_one_hot_embeddings)

  b_model = modeling.AlbertModel(
      config=albert_config,
      is_training=is_training,
      input_ids=b_input_ids,
      input_mask=b_input_mask,
      token_type_ids=b_segment_ids,
      use_one_hot_embeddings=use_one_hot_embeddings)

  # In the demo, we are doing a simple classification task on the entire
  # segment.
  #
  # If you want to use the token-level output, use model.get_sequence_output()
  # instead.
  if FLAGS.use_pooled_output:
    tf.logging.info("using pooled output")
    a_output_layer = a_model.get_pooled_output()
    b_output_layer = b_model.get_pooled_output()
  else:
    tf.logging.info("using meaned output")
    a_output_layer = tf.reduce_mean(a_model.get_sequence_output(), axis=1)
    b_output_layer = tf.reduce_mean(b_model.get_sequence_output(), axis=1)

  with tf.variable_scope("loss"):
    if is_training:
      # I.e., 0.1 dropout
      a_output_layer = tf.nn.dropout(a_output_layer, keep_prob=0.9, name='a_dropout')
      b_output_layer = tf.nn.dropout(b_output_layer, keep_prob=0.9, name='a_dropout')

    from tensorflow.math import l2_normalize, reduce_sum
    a_l2_norm = l2_normalize(a_output_layer, axis=-1)
    b_l2_norm = l2_normalize(b_output_layer, axis=-1)
    predictions = reduce_sum(a_l2_norm*b_l2_norm, axis = -1)#batch_size 1

    from tensorflow.keras.losses import MSE
    loss = MSE(labels, predictions)

    return (a_output_layer, loss, predictions)
Beispiel #18
0
 def anomaly(self, x: np.ndarray, thresh=None, mode='normal') -> np.ndarray:
     """
     Detect anomalies.
     Normal mode determines the mse between reconstructions and inputs.
     Latent mode determines the likelihood of the encodings.
     Applies a threshold if specified.
     """
     if mode in ['normal', 'decoding', 'reconstruction', 'rec', 'x']:
         y = self.predict(x)
         l = MSE(x.reshape(len(x), -1), y.reshape(len(y), -1))
     if mode in ['latent', 'encoding', 'enc', 'z']:
         z = self.encode(x)
         l = lognormpdf(z)
     return l if thresh is None else l > thresh
Beispiel #19
0
def MSEloss(netinput, netoutput):
    """Function to compute the MSEloss for the reconstruction loss of a minibatch.
    
    
    Arguments:
    ------------------------------------------------------------------
    - netinput: `tf.Tensor`, Tensor containing the network reconstruction target of the minibatch for the cells.
    - netoutput: `tf.Tensor`, Tensor containing the reconstructed target of the minibatch for the cells.
    
    Returns:
    ------------------------------------------------------------------
    - mse_loss: `tf.Tensor`, The loss computed for the minibatch, averaged over genes and cells.
    """

    return tf.math.reduce_mean(MSE(netinput, netoutput))
Beispiel #20
0
    def _learn_tf(self, experiences, gamma):
        states, actions, rewards, next_states, dones = experiences
        rewards = tf.expand_dims(rewards, 1)
        dones = tf.expand_dims(dones, 1)

        # ---------------------------- update critic ---------------------------- #
        with tf.GradientTape() as tape:
            tape.watch(self.critic_local.model.trainable_variables)
            # Get predicted next-state actions and Q values from target models
            actions_next = self.actor_target.model(next_states)
            Q_targets_next = self.critic_target.model(
                [next_states, actions_next])
            # Compute Q targets for current states (y_i)
            Q_targets = rewards + (gamma * Q_targets_next * (1 - dones))
            Q_targets = tf.stop_gradient(Q_targets)
            # Compute critic loss
            Q_expected = self.critic_local.model([states, actions])
            critic_loss = MSE(Q_expected, Q_targets)

        # Minimize the loss
        critic_grad = tape.gradient(
            critic_loss, self.critic_local.model.trainable_variables)

        self.critic_optimizer.apply_gradients(
            zip(critic_grad, self.critic_local.model.trainable_variables))

        # ---------------------------- update actor ---------------------------- #
        with tf.GradientTape() as tape:
            tape.watch(self.actor_local.model.trainable_variables)
            # Compute actor loss
            actions_pred = self.actor_local.model(states)
            actor_loss = -tf.reduce_mean(
                self.critic_local.model([states, actions_pred]))

        # Minimize the loss
        actor_grad = tape.gradient(actor_loss,
                                   self.actor_local.model.trainable_variables)

        self.actor_optimizer.apply_gradients(
            zip(actor_grad, self.actor_local.model.trainable_variables))

        # ----------------------- update target networks ----------------------- #
        self.soft_update(self.critic_local.model, self.critic_target.model,
                         self.tau)
        self.soft_update(self.actor_local.model, self.actor_target.model,
                         self.tau)
Beispiel #21
0
    def train(self):
        """
        Performs one step of model training.
        """
        batch_size = min(self.batch_size, len(self.memory))
        minibatch = random.sample(self.memory, batch_size)

        state = [mb[0] for mb in minibatch]
        action = [mb[1] for mb in minibatch]
        reward = [mb[2] for mb in minibatch]
        next_state = [mb[3] for mb in minibatch]
        done = [mb[4] for mb in minibatch]

        states = convert_to_tensor(state, dtype=float32)
        actions = convert_to_tensor(action, dtype=float32)
        rewards = convert_to_tensor(reward, dtype=float32)
        next_states = convert_to_tensor(next_state, dtype=float32)
        dones = convert_to_tensor(done, dtype=float32)

        with GradientTape() as tape:
            target_actions = self.target_actor(next_states)
            critic_value_ = squeeze(
                self.target_critic([next_states, target_actions]), 1)
            critic_value = squeeze(self.critic([states, actions]), 1)
            target = rewards + self.discount_factor * critic_value_ * (1 -
                                                                       dones)
            critic_loss = MSE(target, critic_value)

        critic_network_gradient = tape.gradient(
            critic_loss, self.critic.trainable_variables)
        self.critic.optimizer.apply_gradients(
            zip(critic_network_gradient, self.critic.trainable_variables))

        with GradientTape() as tape:
            new_policy_actions = self.actor(states)
            actor_loss = -self.critic([states, new_policy_actions])
            actor_loss = reduce_mean(actor_loss)

        actor_network_gradient = tape.gradient(actor_loss,
                                               self.actor.trainable_variables)
        self.actor.optimizer.apply_gradients(
            zip(actor_network_gradient, self.actor.trainable_variables))
Beispiel #22
0
def get_loss(loss_name: str) -> Loss:
    """
    Get an object in tensorflow.keras.losses by name.
    :param optimizer_name:
        str
        Support loss_name without case sensitive:
            'sigmoid'
            'mse'

    :return:
        Loss
        A loss object.
    """
    losses = {
        'sigmoid': BinaryCrossentropy(),
        'mse': MSE(),
    }

    loss_name = loss_name.strip().lower()

    try:
        return losses[loss_name]
    except KeyError as keyerr:
        raise SuiValueError(f'{keyerr} is not a valid loss name.')
def compute_euclidean_distance(fake_outputs, ground_truth) -> float:
    return MSE(ground_truth, fake_outputs)
Beispiel #24
0
    def train_step2(self, inp):
        loss = 0
        loss_srn = 0

        input_z, input_s_d, input_s_t = inp

        with tf.GradientTape() as tape1:

            pred_s_d, pred_s_t = self.srn(input_z)

            #             loss_srn_d = tf.reduce_mean(categorical_crossentropy(input_s_d, pred_s_d,) )
            loss_srn_d = tf.reduce_mean(MSE(
                input_s_d,
                pred_s_d,
            ))
            loss_srn_t = tf.reduce_mean(MSE(
                input_s_t,
                pred_s_t,
            ))
            loss_srn = loss_srn_d + loss_srn_t

        variables = self.srn.trainable_variables
        gradients = tape1.gradient(loss_srn, variables)
        self.optimizer.apply_gradients(zip(gradients, variables))

        with tf.GradientTape() as tape2:

            z0_, log_q_z0, w_, u_, b_ = self.recnet(
                (input_z, pred_s_d, pred_s_t))[2:]

            w_ = tf.reshape(w_, (-1, self.ld, self.flow_depth))
            u_ = tf.reshape(u_, (-1, self.ld, self.flow_depth))
            b_ = tf.reshape(b_, (-1, 1, self.flow_depth))
            z_ = [z0_]

            log_dets = []

            for i in range(self.flow_depth):
                z_i, log_det_i = self.pf_layers[i](z_[-1], w_[:, :, i],
                                                   u_[:, :, i], b_[:, :, i])

                z_.append(z_i)
                log_dets.append(log_det_i)

            z_pred = self.gm1(z_[-1])
            s_pred = self.gm2(z_[-1]), self.gm3(z_[-1])

            ##  Rename ##

            #             s = (pred_s_d, pred_s_t)  # predicted s from rec net
            s = (input_s_d, input_s_t)  # cheat and use labels

            log_dets = tf.reduce_sum(log_dets, axis=0)  # sum over flow len

            args = z_pred, input_z, s_pred, s, z0_, log_q_z0, log_dets, z_[-1]

            ##  Compute loss / self.metrics ##
            loss = self.loss_fn(*args, beta=self.beta, gamma=self.gamma)
            metric_res = [(name, fn(*args)) for name, fn in self.metrics]

        metric_res += [("loss_srn_d", loss_srn_d), ("loss_srn_t", loss_srn_t),
                       ("loss_srn", loss_srn)]

        variables = self.recnet.trainable_variables + self.gm1.trainable_variables + self.gm2.trainable_variables + self.gm3.trainable_variables
        gradients = tape2.gradient(loss, variables)
        self.optimizer.apply_gradients(zip(gradients, variables))

        return loss, metric_res
Beispiel #25
0
        x = self.dense2(x)
        return x


network = Vgg16(10)
# network.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = (x_train.reshape(-1, 28, 28, 1) / 255).astype(np.float32)
# x_train, x_test = x_train / 255.0, x_test / 255.0
y_train = np.eye(10)[y_train].astype(np.float32)
x_train = x_train[:100]
y_train = y_train[:100]

input_layer = tf.keras.layers.Input(shape=(28, 28, 1))
output_layer = network(input_layer)
training_model = Model(inputs=input_layer, outputs=output_layer)
optim = Adam()

for i in range(100):
    with tf.GradientTape(watch_accessed_variables=False) as tape:
        tape.watch(training_model.trainable_variables)
        preds = training_model(x_train)
        loss = MSE(preds, y_train)
        cost = tf.reduce_mean(loss)
        grads = tape.gradient(loss, training_model.trainable_variables)
        optim.apply_gradients(zip(grads, training_model.trainable_variables))
    print(cost)
Beispiel #26
0
 def MSE_with_sample_type_indicator(y_true, y_pred):
     return reduce_sum(y_true[:, 0] *
                       MSE(y_true=y_true[:, 1:], y_pred=y_pred[:, 1:]))
Beispiel #27
0
    def train_one_step(self, tx, rx):
        with tf.GradientTape(watch_accessed_variables=False) as polluter_tape, \
                tf.GradientTape(watch_accessed_variables=False) as cleaner_tape, \
                tf.GradientTape(watch_accessed_variables=False) as d1_tape, \
                tf.GradientTape(watch_accessed_variables=False) as d2_tape:
            # do watch tf model
            polluter_tape.watch(self.polluter.trainable_variables)
            cleaner_tape.watch(self.cleaner.trainable_variables)
            d1_tape.watch(self.polluter_critic.trainable_variables)
            d2_tape.watch(self.cleaner_critic.trainable_variables)

            # -----------------------------------------Step 1: train two critics----------------------------------------
            fake_dirty_wave = self.polluter(tx, training=True)

            critic_on_fake = self.polluter_critic(fake_dirty_wave,
                                                  training=True)
            critic_on_real = self.polluter_critic(rx, training=True)
            critic_loss = tf.reduce_mean(MSE(critic_on_fake, tf.zeros_like(critic_on_fake)) + \
                                         MSE(critic_on_real, tf.ones_like(critic_on_real)), keepdims=True)

            gradient_of_polluter_critic = d1_tape.gradient(
                critic_loss, self.polluter_critic.trainable_variables)

            self.polluter_critic_optimizer.apply_gradients(
                zip(gradient_of_polluter_critic,
                    self.polluter_critic.trainable_variables))

            fake_clean_wave = self.cleaner(rx, training=True)
            critic_on_fake2 = self.cleaner_critic(fake_clean_wave,
                                                  training=True)
            critic_on_real2 = self.cleaner_critic(tx, training=True)
            critic_loss2 = tf.reduce_mean(MSE(critic_on_fake2, tf.zeros_like(critic_on_fake2)) + \
                                          MSE(critic_on_real2, tf.ones_like(critic_on_real2)), keepdims=True)

            gradient_of_cleaner_critic = d2_tape.gradient(
                critic_loss2, self.cleaner_critic.trainable_variables)
            self.cleaner_critic_optimizer.apply_gradients(
                zip(gradient_of_cleaner_critic,
                    self.cleaner_critic.trainable_variables))

            # -----------------------------------------Step 2: train polluter-------------------------------------------
            # let polluter pollute the tx signal
            dirty_wave = self.polluter(tx, training=True)

            # the generated dirty_wave should be close to the real rx signal in l2-distance sense.
            polluter_l2_loss = tf.reduce_mean((rx - dirty_wave)**2)

            # score on fake "dirty wave"
            critic_on_fake_dirty_wave = self.polluter_critic(dirty_wave,
                                                             training=True)

            # the polluter should "fool" the polluter critic
            polluter_critic_loss = tf.reduce_mean(MSE(
                critic_on_fake_dirty_wave,
                tf.ones_like(critic_on_fake_dirty_wave)),
                                                  keepdims=True)

            # let the cleaner clean the dirty wave
            after_clean = self.cleaner(dirty_wave, training=True)

            # the cyclic consistency loss
            polluter_cyclic_loss = tf.reduce_mean((tx - after_clean)**2,
                                                  keepdims=True)

            # total loss
            total_polluter_loss = self.alpha * polluter_l2_loss + \
                                  self.beta * polluter_critic_loss + self.gamma * polluter_cyclic_loss

            # update gradient
            gradients_of_polluter = polluter_tape.gradient(
                total_polluter_loss, self.polluter.trainable_variables)
            self.polluter_optimizer.apply_gradients(
                zip(gradients_of_polluter, self.polluter.trainable_variables))

            # -----------------------------------------Step 3: train cleaner--------------------------------------------
            # let the cleaner clean the rx signal
            clean_wave = self.cleaner(rx, training=True)

            # the generated clean wave should be close to the real tx signal in l2-distance sense.
            cleaner_l2_loss = tf.reduce_mean((tx - clean_wave)**2,
                                             keepdims=True)

            # score on fake "clean wave"
            critic_on_fake_clean_wave = self.cleaner_critic(clean_wave,
                                                            training=True)

            # the cleaner should "fool" the cleaner critic
            cleaner_critic_loss = tf.reduce_mean(MSE(
                critic_on_fake_clean_wave,
                tf.ones_like(critic_on_fake_clean_wave)),
                                                 keepdims=True)

            # let the polluter pollute the clean wave
            after_pollute = self.polluter(clean_wave, training=True)

            # the cyclic consistency loss
            cleaner_cyclic_loss = tf.reduce_mean((rx - after_pollute)**2,
                                                 keepdims=True)

            # total loss
            total_cleaner_loss = self.alpha * cleaner_l2_loss + \
                                 self.beta * cleaner_critic_loss + self.gamma * cleaner_cyclic_loss

            gradients_of_cleaner = cleaner_tape.gradient(
                total_cleaner_loss, self.cleaner.trainable_variables)
            self.cleaner_optimizer.apply_gradients(
                zip(gradients_of_cleaner, self.cleaner.trainable_variables))

            # -----------------------------------------Step 4: print some info------------------------------------------
            if self.counter % 20 == 0:
                print("[info]: counter: " + str(self.counter) +
                      " polluter_critic_loss: " + str(critic_loss) +
                      " cleaner_critic_loss: " + str(critic_loss2) +
                      " total_polluter_loss: " + str(total_polluter_loss) +
                      " total_cleaner_loss: " + str(total_cleaner_loss))
Beispiel #28
0
 def MSE_with_sti_and_hsm(y_true, y_pred):
     return reduce_sum(
         tf.sort(y_true[:, 0] *
                 MSE(y_true=y_true[:, 1:], y_pred=y_pred[:, 1:]),
                 direction='DESCENDING')[:num_back])
Beispiel #29
0
def dip_loss(y_true, y_pred):
    mse_real = MSE(tf.math.real(y_true), tf.math.real(y_pred))
    mse_imag = MSE(tf.math.imag(y_true), tf.math.imag(y_pred))
    mse_total = mse_real + mse_imag
    return mse_total
Beispiel #30
0
 def _semantic_loss(y_pred, y_true):
     if n_classes > 1:
         return panoptic_weight * losses.weighted_categorical_crossentropy(
             y_true, y_pred, n_classes=n_classes)
     return panoptic_weight * MSE(y_true, y_pred)