Exemple #1
0
def _calc_logpz(z, z_mu, z_log_sigma, **_):
    # Calculate log densities for sampled latent vectors in a standard
    # Gaussian distribution (zero mean, unit variance)
    logpz = _calc_gaussian_log_density(z, K.zeros_like(z_mu),
                                       K.zeros_like(z_log_sigma))
    logpz = K.sum(logpz, axis=1)
    return logpz
Exemple #2
0
def masked_loss(y_true, y_pred):
    max_args = argmax(y_true)
    mask = cast(not_equal(max_args, zeros_like(max_args)), dtype='float32')
    loss = switch(mask,
                  categorical_crossentropy(y_true, y_pred, from_logits=True),
                  zeros_like(mask, dtype=floatx()))
    return sum(loss) / (cast(sum(mask), dtype='float32') + epsilon())
Exemple #3
0
def masked_accuracy(y_true, y_pred):
    max_args = argmax(y_true)
    mask = cast(not_equal(max_args, zeros_like(max_args)), dtype='float32')
    points = switch(
        mask,
        cast(equal(argmax(y_true, -1), argmax(y_pred, -1)), dtype='float32'),
        zeros_like(mask, dtype=floatx()))
    return sum(points) / cast(sum(mask), dtype='float32')
Exemple #4
0
def exact_matched_accuracy(y_true, y_pred, mask_id):
    true_ids = bk.argmax(y_true, axis=-1)
    pred_ids = bk.argmax(y_pred, axis=-1)

    maskBool = bk.not_equal(true_ids, mask_id)
    maskInt64 = bk.cast(maskBool, 'int64')

    diff = (true_ids - pred_ids) * maskInt64
    matches = bk.cast(bk.not_equal(diff, bk.zeros_like(diff)), 'int64')
    matches = bk.sum(matches, axis=-1)
    matches = bk.cast(bk.equal(matches, bk.zeros_like(matches)), bk.floatx())
    return bk.mean(matches)
Exemple #5
0
def adversarial_loss(net_d,
                     real,
                     fake_abgr,
                     distorted,
                     gan_training="mixup_LSGAN",
                     **weights):
    """ Adversarial Loss Function from Shoanlu GAN """
    alpha = Lambda(lambda x: x[:, :, :, :1])(fake_abgr)
    fake_bgr = Lambda(lambda x: x[:, :, :, 1:])(fake_abgr)
    fake = alpha * fake_bgr + (1 - alpha) * distorted

    if gan_training == "mixup_LSGAN":
        dist = Beta(0.2, 0.2)
        lam = dist.sample()
        mixup = lam * concatenate([real, distorted]) + (1 - lam) * concatenate(
            [fake, distorted])
        pred_fake = net_d(concatenate([fake, distorted]))
        pred_mixup = net_d(mixup)
        loss_d = calc_loss(pred_mixup, lam * K.ones_like(pred_mixup), "l2")
        loss_g = weights['w_D'] * calc_loss(pred_fake, K.ones_like(pred_fake),
                                            "l2")
        mixup2 = lam * concatenate(
            [real, distorted]) + (1 - lam) * concatenate([fake_bgr, distorted])
        pred_fake_bgr = net_d(concatenate([fake_bgr, distorted]))
        pred_mixup2 = net_d(mixup2)
        loss_d += calc_loss(pred_mixup2, lam * K.ones_like(pred_mixup2), "l2")
        loss_g += weights['w_D'] * calc_loss(pred_fake_bgr,
                                             K.ones_like(pred_fake_bgr), "l2")
    elif gan_training == "relativistic_avg_LSGAN":
        real_pred = net_d(concatenate([real, distorted]))
        fake_pred = net_d(concatenate([fake, distorted]))
        loss_d = K.mean(K.square(real_pred - K.ones_like(fake_pred))) / 2
        loss_d += K.mean(K.square(fake_pred - K.zeros_like(fake_pred))) / 2
        loss_g = weights['w_D'] * K.mean(
            K.square(fake_pred - K.ones_like(fake_pred)))

        fake_pred2 = net_d(concatenate([fake_bgr, distorted]))
        loss_d += K.mean(
            K.square(real_pred - K.mean(fake_pred2, axis=0) -
                     K.ones_like(fake_pred2))) / 2
        loss_d += K.mean(
            K.square(fake_pred2 - K.mean(real_pred, axis=0) -
                     K.zeros_like(fake_pred2))) / 2
        loss_g += weights['w_D'] * K.mean(
            K.square(real_pred - K.mean(fake_pred2, axis=0) -
                     K.zeros_like(fake_pred2))) / 2
        loss_g += weights['w_D'] * K.mean(
            K.square(fake_pred2 - K.mean(real_pred, axis=0) -
                     K.ones_like(fake_pred2))) / 2
    else:
        raise ValueError(
            "Receive an unknown GAN training method: {gan_training}")
    return loss_d, loss_g
Exemple #6
0
    def recursion(self, input_energy, mask=None, go_backwards=False,
                  return_sequences=True, return_logZ=True, input_length=None):
        """Forward (alpha) or backward (beta) recursion
        If `return_logZ = True`, compute the logZ, the normalization constant:
        \[ Z = \sum_{y1, y2, y3} exp(-E) # energy
          = \sum_{y1, y2, y3} exp(-(u1' y1 + y1' W y2 + u2' y2 + y2' W y3 + u3' y3))
          = sum_{y2, y3} (exp(-(u2' y2 + y2' W y3 + u3' y3))
          sum_{y1} exp(-(u1' y1' + y1' W y2))) \]
        Denote:
            \[ S(y2) := sum_{y1} exp(-(u1' y1 + y1' W y2)), \]
            \[ Z = sum_{y2, y3} exp(log S(y2) - (u2' y2 + y2' W y3 + u3' y3)) \]
            \[ logS(y2) = log S(y2) = log_sum_exp(-(u1' y1' + y1' W y2)) \]
        Note that:
              yi's are one-hot vectors
              u1, u3: boundary energies have been merged
        If `return_logZ = False`, compute the Viterbi's best path lookup table.
        """
        chain_energy = self.chain_kernel
        # shape=(1, F, F): F=num of output features. 1st F is for t-1, 2nd F for t
        chain_energy = K.expand_dims(chain_energy, 0)
        # shape=(B, F), dtype=float32
        prev_target_val = K.zeros_like(input_energy[:, 0, :])

        if go_backwards:
            input_energy = K.reverse(input_energy, 1)
            if mask is not None:
                mask = K.reverse(mask, 1)

        initial_states = [prev_target_val, K.zeros_like(prev_target_val[:, :1])]
        constants = [chain_energy]

        if mask is not None:
            mask2 = K.cast(K.concatenate([mask, K.zeros_like(mask[:, :1])], axis=1),
                           K.floatx())
            constants.append(mask2)

        def _step(input_energy_i, states):
            return self.step(input_energy_i, states, return_logZ)

        target_val_last, target_val_seq, _ = K.rnn(_step, input_energy,
                                                   initial_states,
                                                   constants=constants,
                                                   input_length=input_length,
                                                   unroll=self.unroll)

        if return_sequences:
            if go_backwards:
                target_val_seq = K.reverse(target_val_seq, 1)
            return target_val_seq
        else:
            return target_val_last
Exemple #7
0
    def get_updates(self, params, loss):
        grads = self.get_gradients(loss, params)
        shapes = [K.shape(p) for p in params]
        alphas = [
            K.variable(K.ones(shape) * self.init_alpha) for shape in shapes
        ]
        old_grads = [K.zeros(shape) for shape in shapes]
        prev_weight_deltas = [K.zeros(shape) for shape in shapes]
        self.weights = alphas + old_grads
        self.updates = []

        for param, grad, old_grad, prev_weight_delta, alpha in zip(
                params, grads, old_grads, prev_weight_deltas, alphas):
            # equation 4
            new_alpha = K.switch(
                K.greater(grad * old_grad, 0),
                K.minimum(alpha * self.scale_up, self.max_alpha),
                K.switch(K.less(grad * old_grad, 0),
                         K.maximum(alpha * self.scale_down, self.min_alpha),
                         alpha))

            # equation 5
            new_delta = K.switch(
                K.greater(grad, 0), -new_alpha,
                K.switch(K.less(grad, 0), new_alpha, K.zeros_like(new_alpha)))

            # equation 7
            weight_delta = K.switch(K.less(grad * old_grad, 0),
                                    -prev_weight_delta, new_delta)

            # equation 6
            new_param = param + weight_delta

            # reset gradient_{t-1} to 0 if gradient sign changed (so that we do
            # not "double punish", see paragraph after equation 7)
            grad = K.switch(K.less(grad * old_grad, 0), K.zeros_like(grad),
                            grad)

            # Apply constraints
            #if param in constraints:
            #    c = constraints[param]
            #    new_param = c(new_param)

            self.updates.append(K.update(param, new_param))
            self.updates.append(K.update(alpha, new_alpha))
            self.updates.append(K.update(old_grad, grad))
            self.updates.append(K.update(prev_weight_delta, weight_delta))

        return self.updates
Exemple #8
0
 def get_psp(self, output_spikes):
     new_spiketimes = tf.where(k.greater(output_spikes, 0),
                               k.ones_like(output_spikes) * self.time,
                               self.last_spiketimes)
     new_spiketimes = tf.where(k.less(output_spikes, 0),
                               k.zeros_like(output_spikes) * self.time,
                               new_spiketimes)
     assign_new_spiketimes = tf.assign(self.last_spiketimes, new_spiketimes)
     with tf.control_dependencies([assign_new_spiketimes]):
         last_spiketimes = self.last_spiketimes + 0  # Dummy op
         # psp = k.maximum(0., tf.divide(self.dt, last_spiketimes))
         psp = tf.where(k.greater(last_spiketimes, 0),
                        k.ones_like(output_spikes) * self.dt,
                        k.zeros_like(output_spikes))
     return psp
Exemple #9
0
    def call(self, u_vecs):
        if self.share_weights:
            u_hat_vecs = K.conv1d(u_vecs, self.W)
        else:
            u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1])

        batch_size = K.shape(u_vecs)[0]
        input_num_capsule = K.shape(u_vecs)[1]
        u_hat_vecs = K.reshape(u_hat_vecs, (batch_size, input_num_capsule,
                                            self.num_capsule, self.dim_capsule))
        u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3))
        #final u_hat_vecs.shape = [None, num_capsule, input_num_capsule, dim_capsule]

        b = K.zeros_like(u_hat_vecs[:,:,:,0]) #shape = [None, num_capsule, input_num_capsule]
        for i in range(self.routings):
            c = softmax(b, 1)
            # o = K.batch_dot(c, u_hat_vecs, [2, 2])
            o = tf.einsum('bin,binj->bij', c, u_hat_vecs)
            if K.backend() == 'theano':
                o = K.sum(o, axis=1)
            if i < self.routings - 1:
                o = K.l2_normalize(o, -1)
                # b = K.batch_dot(o, u_hat_vecs, [2, 3])
                b = tf.einsum('bij,binj->bin', o, u_hat_vecs)
                if K.backend() == 'theano':
                    b = K.sum(b, axis=1)

        return self.activation(o)
Exemple #10
0
 def get_updates(self, loss, params):
   sync_cond = K.equal((self.iterations + 1) // self.sync_period *
                       self.sync_period, (self.iterations + 1))
   if TF_KERAS:
     slow_params = [K.variable(K.get_value(p), name='sp_{}'.format(i))
                    for i, p in enumerate(params)]
     self.updates = self.optimizer.get_updates(loss, params)
     slow_updates = []
     for p, sp in zip(params, slow_params):
       sp_t = sp + self.slow_step * (p - sp)
       slow_updates.append(K.update(sp, K.switch(
           sync_cond,
           sp_t,
           sp,
       )))
       slow_updates.append(K.update_add(p, K.switch(
           sync_cond,
           sp_t - p,
           K.zeros_like(p),
       )))
   else:
     slow_params = {p.name: K.variable(K.get_value(
         p), name='sp_{}'.format(i)) for i, p in enumerate(params)}
     update_names = ['update', 'update_add', 'update_sub']
     original_updates = [getattr(K, name) for name in update_names]
     setattr(K, 'update', lambda x, new_x: ('update', x, new_x))
     setattr(K, 'update_add', lambda x, new_x: ('update_add', x, new_x))
     setattr(K, 'update_sub', lambda x, new_x: ('update_sub', x, new_x))
     self.updates = self.optimizer.get_updates(loss, params)
     for name, original_update in zip(update_names, original_updates):
       setattr(K, name, original_update)
     slow_updates = []
     for i, update in enumerate(self.updates):
       if isinstance(update, tuple):
         name, x, new_x, adjusted = update + (update[-1],)
         update_func = getattr(K, name)
         if name == 'update_add':
           adjusted = x + new_x
         if name == 'update_sub':
           adjusted = x - new_x
         if x.name not in slow_params:
           self.updates[i] = update_func(x, new_x)
         else:
           slow_param = slow_params[x.name]
           slow_param_t = slow_param + \
               self.slow_step * (adjusted - slow_param)
           slow_updates.append(K.update(slow_param, K.switch(
               sync_cond,
               slow_param_t,
               slow_param,
           )))
           self.updates[i] = K.update(x, K.switch(
               sync_cond,
               slow_param_t,
               adjusted,
           ))
     slow_params = list(slow_params.values())
   self.updates += slow_updates
   self.weights = self.optimizer.weights + slow_params
   return self.updates
Exemple #11
0
    def decorator(self, x):

        # Only call layer if there are input spikes. This is to prevent
        # accumulation of bias.
        self.impulse = tf.cond(k.any(k.not_equal(x, 0)), lambda: call(self, x),
                               lambda: k.zeros_like(self.mem))
        return self.update_neurons()
Exemple #12
0
def dummy_weighted_categorical_loss(y_true, y_pred):
    y_pred /= K.sum(y_pred, axis=-1, keepdims=True)
    y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon())
    loss = y_true * K.log(y_pred)
    loss = -K.sum(loss, -1)
    condition = K.greater(K.sum(y_true), 0)
    return K.switch(condition, loss, K.zeros_like(loss))
Exemple #13
0
    def call(self, inputs):
        

        if self.share_weights:
            hat_inputs = K.conv1d(inputs, self.kernel)
        else:
            hat_inputs = K.local_conv1d(inputs, self.kernel, [1], [1])

        batch_size = K.shape(inputs)[0]
        input_num_capsule = K.shape(inputs)[1]
        hat_inputs = K.reshape(hat_inputs,
                               (batch_size, input_num_capsule,
                                self.num_capsule, self.dim_capsule))
        hat_inputs = K.permute_dimensions(hat_inputs, (0, 2, 1, 3))

        b = K.zeros_like(hat_inputs[:, :, :, 0])
        for i in range(self.routings):
            c = softmax(b, 1)
            o = self.activation(keras.backend.batch_dot(c, hat_inputs, [2, 2]))
            if i < self.routings - 1:
                b = keras.backend.batch_dot(o, hat_inputs, [2, 3])
                if K.backend() == 'theano':
                    o = K.sum(o, axis=1)

        return o
Exemple #14
0
 def call(self, inputs):
     mu, std = inputs
     var_dist = tfp.MultivariateNormalDiag(loc=mu, scale_diag=std)
     pri_dist = tfp.MultivariateNormalDiag(loc=K.zeros_like(mu),
                                           scale_diag=K.ones_like(std))
     kl_loss = self.lamb_kl * K.mean(tfp.kl_divergence(var_dist, pri_dist))
     return kl_loss
    def call(self, inputs):
        """Following the routing algorithm from Hinton's paper,
        but replace b = b + <u,v> with b = <u,v>.

        This change can improve the feature representation of Capsule.

        However, you can replace
            b = K.batch_dot(outputs, hat_inputs, [2, 3])
        with
            b += K.batch_dot(outputs, hat_inputs, [2, 3])
        to realize a standard routing.
        """

        if self.share_weights:
            hat_inputs = K.conv1d(inputs, self.kernel)
        else:
            hat_inputs = K.local_conv1d(inputs, self.kernel, [1], [1])

        batch_size = K.shape(inputs)[0]
        input_num_capsule = K.shape(inputs)[1]
        hat_inputs = K.reshape(hat_inputs,
                               (batch_size, input_num_capsule,
                                self.num_capsule, self.dim_capsule))
        hat_inputs = K.permute_dimensions(hat_inputs, (0, 2, 1, 3))

        b = K.zeros_like(hat_inputs[:, :, :, 0])
        for i in range(self.routings):
            c = softmax(b, 1)
            o = self.activation(caps_batch_dot(c, hat_inputs))
            if i < self.routings - 1:
                b = caps_batch_dot(o, hat_inputs)
                if K.backend() == 'theano':
                    o = K.sum(o, axis=1)

        return o
Exemple #16
0
def yolo_det_loss(y, p):

    # X/Y values
    y_xy = y[..., 0:2]
    p_xy = p[..., 0:2]

    # Width/Height values
    y_wh = y[..., 2:4]
    p_wh = p[..., 2:4]

    # Object confidence
    y_conf = y[..., 4]
    p_conf = p[..., 4]

    # Intersection over Union
    intersect_wh = K.maximum(K.zeros_like(p_wh),
                             (p_wh + y_wh) / 2 - K.square(p_xy - y_xy))
    I = intersect_wh[..., 0] * intersect_wh[..., 1]
    true_area = y_wh[..., 0] * y_wh[..., 1]
    pred_area = p_wh[..., 0] * p_wh[..., 1]
    U = pred_area + true_area - I
    iou = I / U

    # Calculate individual errors
    e_xy = K.sum(K.sum(K.square(y_xy - p_xy), axis=-1) * y_conf, axis=-1)
    e_wh = K.sum(K.sum(K.square(K.sqrt(y_wh) - K.sqrt(p_wh)), axis=-1) *
                 y_conf,
                 axis=-1)
    e_conf = K.sum(K.square(y_conf * iou - p_conf), axis=-1)

    # Sum all errors
    e = e_xy + e_wh + 10 * e_conf
    return e
def heteroscedastic_crossentropy(y_true, logits_log_var):
    def monte_carlo(T, logits, gaussian):
        T_softmax = K.zeros_like(logits)
        n_classes = logits.shape[-1]

        for i in range(T):
            # (?, K) <- (K, ?) <- (K, ?, 1)
            noise = K.transpose(K.squeeze(gaussian.sample(n_classes),
                                          axis=-1))  # draw a sample per logit
            #noise = gaussian.sample() # draw sample from multivariate, for all logits at once
            T_softmax += K.softmax(logits + noise)
        # (?, K)
        return (1 / T) * T_softmax

    n_classes = logits_log_var.shape[-1] - 1  #10
    #n_classes = logits_log_var.shape[-1] // 2 #10
    std = K.sqrt(K.exp(logits_log_var[:, n_classes:]))

    # get T softmax monte carlo simulations
    y_hat = monte_carlo(
        T=100,  # number of simulations
        logits=logits_log_var[:, :n_classes],  # logits
        gaussian=tf.distributions.Normal(loc=K.zeros_like(std),
                                         scale=std))  # log_var to std

    y_hat = K.clip(y_hat, 1e-11, 1 - 1e-11)  # prevent nans
    #beta = 1.
    #gamma = .1
    #H = -K.sum(y_hat * K.log(y_hat), -1) # entropy term to punish confident predictions
    nll = -K.sum(y_true * K.log(y_hat), -1)  # negative log likelihood
    return nll
Exemple #18
0
 def accfun(y0, y1):
     x_pos = K.ones_like(x_r)
     x_neg = K.zeros_like(x_r)
     loss_r = K.mean(tf.keras.metrics.binary_accuracy(x_pos, x_r))
     loss_f = K.mean(tf.keras.metrics.binary_accuracy(x_neg, x_f))
     loss_p = K.mean(tf.keras.metrics.binary_accuracy(x_neg, x_p))
     return (1.0 / 3.0) * (loss_r + loss_p + loss_f)
Exemple #19
0
def zero_loss(y_true, y_pred):
    """
    args:
        y_true():
        y_pred():
    """
    return K.zeros_like(y_true)
Exemple #20
0
    def _get_weight_matrix(self, freq_true: tf.Tensor,
                           freq_pred: tf.Tensor) -> tf.Tensor:
        """ Calculate a continuous, dynamic weight matrix based on current Euclidean distance.

        Parameters
        ----------
        freq_true: :class:`tf.Tensor`
            The real and imaginary DFT frequencies for the true batch of images
        freq_pred: :class:`tf.Tensor`
            The real and imaginary DFT frequencies for the predicted batch of images

        Returns
        -------
        :class:`tf.Tensor`
            The weights matrix for prioritizing hard frequencies
        """
        weights = K.square(freq_pred - freq_true)
        weights = K.sqrt(weights[..., 0] + weights[..., 1])
        weights = K.pow(weights, self._alpha)

        if self._log_matrix:  # adjust the spectrum weight matrix by logarithm
            weights = K.log(weights + 1.0)

        if self._batch_matrix:  # calculate the spectrum weight matrix using batch-based statistics
            weights = weights / K.max(weights)
        else:
            weights = weights / K.max(K.max(weights, axis=-2),
                                      axis=-2)[..., None, None, :]

        weights = K.switch(tf.math.is_nan(weights), K.zeros_like(weights),
                           weights)
        weights = K.clip(weights, min_value=0.0, max_value=1.0)

        return weights
Exemple #21
0
    def get_updates(self, params, loss):
        grads = self.get_gradients(loss, params)
        shapes = [K.get_variable_shape(p) for p in params]
        alphas = [
            K.variable(K.ones(shape) * self.init_alpha) for shape in shapes
        ]
        old_grads = [K.zeros(shape) for shape in shapes]
        self.weights = alphas + old_grads
        self.updates = []

        for p, grad, old_grad, alpha in zip(params, grads, old_grads, alphas):
            grad = K.sign(grad)
            new_alpha = K.switch(
                K.greater(grad * old_grad, 0),
                K.minimum(alpha * self.scale_up, self.max_alpha),
                K.switch(K.less(grad * old_grad, 0),
                         K.maximum(alpha * self.scale_down, self.min_alpha),
                         alpha))

            grad = K.switch(K.less(grad * old_grad, 0), K.zeros_like(grad),
                            grad)
            new_p = p - grad * new_alpha

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)
            self.updates.append(K.update(p, new_p))
            self.updates.append(K.update(alpha, new_alpha))
            self.updates.append(K.update(old_grad, grad))

        return self.updates
Exemple #22
0
 def lossfun(self, y_real, y_fake_f, y_fake_p):
     y_pos = K.ones_like(y_real)
     y_neg = K.zeros_like(y_real)
     loss_real = tf.keras.losses.binary_crossentropy(y_pos, y_real)
     loss_fake_f = tf.keras.losses.binary_crossentropy(y_neg, y_fake_f)
     loss_fake_p = tf.keras.losses.binary_crossentropy(y_neg, y_fake_p)
     return K.mean(loss_real + loss_fake_f + loss_fake_p)
    def _init_cel(self, A_graph, b_graph, c_graph, y):
        # Sanity Checks
        y = tf.check_numerics(y, 'Problem with input y')

        # Find intersection points between Ax-b and the line joining the c and y
        Ac = tf.reduce_sum(A_graph * tf.expand_dims(c_graph, axis=-2), axis=-1)
        bMinusAc = b_graph - Ac
        yMinusc = y - c_graph
        ADotyMinusc = tf.reduce_sum((A_graph * tf.expand_dims(yMinusc, -2)),
                                    axis=-1)
        intersection_alphas = bMinusAc / (ADotyMinusc + K.epsilon())

        # Enforce intersection_alpha > 0 because the point must lie on the ray from c to y
        less_equal_0 = K.less_equal(intersection_alphas,
                                    K.zeros_like(intersection_alphas))
        candidate_alpha = K.switch(
            less_equal_0,
            K.ones_like(intersection_alphas) *
            tf.constant(np.inf, dtype='float32'), intersection_alphas)

        # Find closest the intersection point closest to the interior point to get projection point
        intersection_alpha = K.min(candidate_alpha, axis=-1, keepdims=True)

        # If it is an interior point, y itself is the projection point
        is_interior_point = K.greater_equal(intersection_alpha,
                                            K.ones_like(intersection_alpha))
        alpha = K.switch(is_interior_point, K.ones_like(intersection_alpha),
                         intersection_alpha)

        # Return z = \alpha.y + (1 - \alpha).c
        z = alpha * y + ((1 - alpha) * c_graph)

        return z
    def call(self, u_vecs):
        if self.share_weights:
            u_hat_vecs = K.conv1d(u_vecs, self.W)
        else:
            u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1])

        batch_size = K.shape(u_vecs)[0]
        input_num_capsule = K.shape(u_vecs)[1]
        u_hat_vecs = K.reshape(u_hat_vecs,
                               (batch_size, input_num_capsule,
                                self.num_capsule, self.dim_capsule))
        u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3))
        # final u_hat_vecs.shape = [None, num_capsule, input_num_capsule, dim_capsule]

        b = K.zeros_like(
            u_hat_vecs[:, :, :,
                       0])  # shape = [None, num_capsule, input_num_capsule]
        for i in range(self.routings):
            b = K.permute_dimensions(
                b, (0, 2, 1))  # shape = [None, input_num_capsule, num_capsule]
            c = K.softmax(b)
            c = K.permute_dimensions(c, (0, 2, 1))
            b = K.permute_dimensions(b, (0, 2, 1))
            outputs = self.activation(K.batch_dot(c, u_hat_vecs, [2, 2]))
            if i < self.routings - 1:
                b = K.batch_dot(outputs, u_hat_vecs, [2, 3])

        return outputs
Exemple #25
0
 def dropped_mask():
     drop_mask = K.switch(
         K.random_uniform(K.shape(inputs)) < self.drop_rate,
         K.ones_like(inputs, K.floatx()),
         K.zeros_like(inputs, K.floatx()),
     )
     return target_mask * drop_mask
def saliency_map(input_x, input_y, logits, conv_output):
    """
    Compute the saliency map for a text input
    """
    # shape output (batch_size, sequence_length, num_filters)
    # shape grads_val (batch_size, sequence_length, num_filters)
    _, grads_val = get_gradients(input_x, input_y, logits, conv_output)

    # get the maximum gradient for each gram of words
    # shape (batch_size, sequence_length)
    s_maps = K.max(grads_val, axis=2)

    # Process s_maps
    new_s_maps = K.zeros_like(s_maps)
    for i in range(s_maps.shape[0]):
        # Distance from the mean
        s_map_ = s_maps[i] - K.mean(s_maps[i])

        # Keep only positive values
        s_map_ = K.maximum(s_map_, 0)

        # Normalize
        s_map_ = s_map_ / s_map_.max() if s_map_.max() != 0 else s_map_

        new_s_maps[i] = s_map_

    return new_s_maps
    def make_readout_decode_model(self, max_output_len=32):
        src_seq_input = Input(shape=(None, ), dtype='int32')
        tgt_start_input = Input(shape=(1, ), dtype='int32')
        src_seq = src_seq_input
        enc_mask = Lambda(lambda x: K.cast(K.greater(x, 0), 'float32'))(
            src_seq)
        src_emb = self.i_word_emb(src_seq)
        if self.pos_emb:
            src_emb = add_layer([src_emb, self.pos_emb(src_seq)])

        src_emb = self.emb_dropout(src_emb)
        enc_output = self.encoder(src_emb, src_seq)

        tgt_emb = self.o_word_emb(tgt_start_input)
        tgt_seq = Lambda(lambda x: K.repeat_elements(x, max_output_len, 1))(
            tgt_start_input)
        rep_input = Lambda(lambda x: K.repeat_elements(x, max_output_len, 1))(
            tgt_emb)

        cell = ReadoutDecoderCell(self.o_word_emb, self.pos_emb, self.decoder,
                                  self.target_layer)
        final_output = InferRNN(cell, return_sequences=True)(rep_input,
          initial_state=[tgt_start_input, K.ones_like(tgt_start_input), K.zeros_like(tgt_seq)] + \
            [rep_input for _ in self.decoder.layers],
          constants=[enc_output, enc_mask])
        final_output = Lambda(lambda x: K.squeeze(x, -1))(final_output)
        self.readout_model = Model([src_seq_input, tgt_start_input],
                                   final_output)
def grad_cam(input_x, input_y, logits, conv_output):
    """
    Compute the grad-cam for a text input
    """
    # shape output (batch_size, sequence_length, num_filters)
    # shape grads_val (batch_size, sequence_length, num_filters)
    output, grads_val = get_gradients(input_x, input_y, logits, conv_output)

    # get the maximum gradient for each gram of words
    # shape (batch_size, sequence_length)
    weights = K.max(grads_val, axis=2)

    # shape cam (batch_size, sequence_length)
    cams = tf.einsum('ijk,ij->ij', output, weights)

    # Process cam
    new_cams = K.zeros_like(cams)
    for i, cam in enumerate(cams):
        # Distance from the mean
        cam_ = cam - cam.mean()

        # Keep only positive values
        cam_ = K.maximum(cam_, 0)

        # Normalize
        cam_ = cam_ / cam_.max() if cam_.max() != 0 else cam_

        new_cams[i] = cam_

    return new_cams
Exemple #29
0
def masked_softmax(vector, mask):
    """
    `K.softmax(vector)` does not work if some elements of `vector` should be masked.  This performs
    a softmax on just the non-masked portions of `vector` (passing None in for the mask is also
    acceptable; you'll just get a regular softmax).

    We assume that both `vector` and `mask` (if given) have shape (batch_size, vector_dim).

    In the case that the input vector is completely masked, this function returns an array
    of ``0.0``. This behavior may cause ``NaN`` if this is used as the last layer of a model
    that uses categorial cross-entropy loss.
    """
    # We calculate masked softmax in a numerically stable fashion, as done
    # in https://github.com/rkadlec/asreader/blob/master/asreader/custombricks/softmax_mask_bricks.py
    if mask is not None:
        # Here we get normalized log probabilities for
        # enhanced numerical stability.
        mask = K.cast(mask, "float32")
        input_masked = mask * vector
        shifted = mask * (input_masked -
                          K.max(input_masked, axis=1, keepdims=True))
        # We add epsilon to avoid numerical instability when
        # the sum in the log yields 0.
        normalization_constant = K.log(
            K.sum(mask * K.exp(shifted), axis=1, keepdims=True) + K.epsilon())
        normalized_log_probabilities = mask * (shifted -
                                               normalization_constant)
        unmasked_probabilities = K.exp(normalized_log_probabilities)
        return switch(mask, unmasked_probabilities,
                      K.zeros_like(unmasked_probabilities))
    else:
        # There is no mask, so we use the provided ``K.softmax`` function.
        return K.softmax(vector)
Exemple #30
0
 def create_inital_state(inputs, hidden_size):
     # We are not using initial states, but need to pass something to K.rnn funciton
     fake_state = K.zeros_like(inputs)  # <= (batch_size, enc_seq_len, latent_dim
     fake_state = K.sum(fake_state, axis=[1, 2])  # <= (batch_size)
     fake_state = K.expand_dims(fake_state)  # <= (batch_size, 1)
     fake_state = K.tile(fake_state, [1, hidden_size])  # <= (batch_size, latent_dim
     return fake_state