Exemple #1
0
    def angular_loss_2(y_true, y_pred):
        y_pred = K.clip(y_pred, _EPSILON, 1.0 - _EPSILON)
        loss = tf.convert_to_tensor(0, dtype=tf.float32)
        g = tf.constant(1.0, shape=[1], dtype=tf.float32)
        c = tf.constant(4.0, shape=[1], dtype=tf.float32)
        d = tf.constant(2.0, shape=[1], dtype=tf.float32)
        alpha = tf.constant(45.0, shape=[1], dtype=tf.float32)

        losses = []
        losses2 = []
        for i in range(0, batch_size, 3):
            try:
                xa = y_pred[i + 0]
                xp = y_pred[i + 1]
                xn = y_pred[i + 2]

                fapn = c * (tf.tan(alpha * K.transpose(xa + xp) * xn)**
                            2) - d * (g +
                                      tf.tan(alpha)**2) * K.transpose(xa) * xp
                losses.append(fapn)

                losses2.append(K.transpose(xa) * xn - K.transpose(xa) * xp)

                loss = (loss + g + _loss)
            except:
                continue
        loss = K.sum(K.log(1 + 2 * K.sum([K.exp(v) for v in losses])))
        loss2 = K.sum(K.log(1 + 2 * K.sum([K.exp(v) for v in losses2])))
        loss = loss + 2 * loss2
        loss = loss / (batch_size / 3)
        zero = tf.constant(0.0, shape=[1], dtype=tf.float32)
        return tf.maximum(loss, zero)
Exemple #2
0
def Kget_dists(X):
    """Keras code to compute the pairwise distance matrix for a set of
    vectors specifie by the matrix X.
    """
    x2 = K.expand_dims(K.sum(K.square(X), axis=1), 1)
    dists = x2 + K.transpose(x2) - 2 * K.dot(X, K.transpose(X))
    return dists
Exemple #3
0
    def fallback_metric(self, y_true, y_pred):
        #grab the most confident prediction
        predictions = K.max(y_pred, axis=-1)

        #fill a tensor with our threshold_value
        threshold_tensor = tf.fill(tf.shape(predictions), self.threshold)

        #Are we confident in our prediction?
        threshold_high = predictions > threshold_tensor
        threshold_high = tf.cast(threshold_high, tf.int32)

        #Do we have low confidence in our prediction?
        threshold_low = predictions <= threshold_tensor
        threshold_low = tf.cast(threshold_low, tf.int32)

        idx_true = K.argmax(y_true, -1)
        idx_pred = K.argmax(y_pred, -1)

        #For our confident predictions, compare the top prediction to the label of the true value
        high_correct = math_ops.equal(idx_true, idx_pred)
        high_correct = tf.cast(high_correct, tf.int32)

        #For our less confident predictions, grab the top 2 most confident predictions
        _, max_pred = tf.math.top_k(y_pred, k=2)

        #Gather the lineages of those top 2 predictions using the transpose of the hierarchy's adjaency matrix because the adjacency only points from ancestor to descendant
        lineages = tf.gather(K.transpose(self.hierarchy.A), max_pred)
        lineages = K.cast(lineages, tf.int32)

        #Grab the first two columns of this matrix
        fallback = tf.bitwise.bitwise_and(lineages[:, 0], lineages[:, 1])

        #Gather the lineage of the true value
        actual = tf.gather(K.transpose(self.hierarchy.A), K.argmax(y_true))
        actual = K.cast(actual, tf.int32)

        #Multiply the two together
        overlap_score = K.batch_dot(fallback, actual)

        #Are either of the top 2 predictions in the lineage of the true value? If so, overlap_score should be >1 and we count the result as correct
        low_correct = overlap_score > 1
        low_correct = tf.cast(low_correct, tf.int32)
        low_correct = tf.squeeze(low_correct)

        #results for the high confidence predictions
        high_accuracy = tf.math.multiply(threshold_high, high_correct)

        #results for the low confidence predictions
        low_accuracy = tf.math.multiply(threshold_low, low_correct)

        # total accuracy vector
        correct = high_accuracy + low_accuracy

        #return batch accuracy value
        return K.mean(K.cast(correct, tf.float32))
Exemple #4
0
 def call(self, inputs, **kwargs):
     """ student t-distribution, as same as used in t-SNE algorithm.
              q_ij = 1/(1+dist(x_i, u_j)^2), then normalize it.
     Arguments:
         inputs: the variable containing data, shape=(n_samples, n_features)
     Return:
         q: student's t-distribution, or soft labels for each sample. shape=(n_samples, n_clusters)
     """
     q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha))
     q **= (self.alpha + 1.0) / 2.0
     q = K.transpose(K.transpose(q) / K.sum(q, axis=1))
     return q
Exemple #5
0
 def _compute_carry_and_output(self, x, h_tm1, c_tm1, b):
     """Computes carry and output using split kernels."""
     x_i, x_f, x_c, x_o = x
     h_tm1_i, h_tm1_f, h_tm1_c, h_tm1_o = h_tm1
     b_i2, b_f2, b_c2, b_o2 = b
     i = self.recurrent_activation(
         x_i + K.bias_add(K.dot(h_tm1_i, K.transpose(self.recurrent_kernel[:, :self.units])), b_i2))
     f = self.recurrent_activation(x_f + K.bias_add(K.dot(
         h_tm1_f, K.transpose(self.recurrent_kernel[:, self.units:self.units * 2])), b_f2))
     c = f * c_tm1 + i * self.activation(x_c + K.bias_add(K.dot(
         h_tm1_c, K.transpose(self.recurrent_kernel[:, self.units * 2:self.units * 3])), b_c2))
     o = self.recurrent_activation(
         x_o + K.bias_add(K.dot(h_tm1_o, K.transpose(self.recurrent_kernel[:, self.units * 3:])), b_o2))
     return c, o
Exemple #6
0
 def call(self, inputs, **kwargs):
     main_input, embedding_matrix = inputs
     input_shape_tensor = K.shape(main_input)
     last_input_dim = K.int_shape(main_input)[-1]
     emb_input_dim, emb_output_dim = K.int_shape(embedding_matrix)
     projected = K.dot(K.reshape(main_input, (-1, last_input_dim)),
                       self.embedding_weights['projection'])
     if self.add_biases:
         projected = K.bias_add(projected,
                                self.embedding_weights['biases'],
                                data_format='channels_last')
     if 0 < self.projection_dropout < 1:
         projected = K.in_train_phase(
             lambda: K.dropout(projected, self.projection_dropout),
             projected,
             training=kwargs.get('training'))
     attention = K.dot(projected, K.transpose(embedding_matrix))
     if self.scaled_attention:
         # scaled dot-product attention, described in
         # "Attention is all you need" (https://arxiv.org/abs/1706.03762)
         sqrt_d = K.constant(math.sqrt(emb_output_dim), dtype=K.floatx())
         attention = attention / sqrt_d
     result = K.reshape(
         self.activation(attention),
         (input_shape_tensor[0], input_shape_tensor[1], emb_input_dim))
     return result
Exemple #7
0
    def call(self, inputs, training=None):
        def _l2normalize(v, eps=1e-12):
            return v / (K.sum(v**2)**0.5 + eps)

        def power_iteration(W, u):
            _u = u
            _v = _l2normalize(K.dot(_u, K.transpose(W)))
            _u = _l2normalize(K.dot(_v, W))
            return _u, _v

        if self.spectral_normalization:
            W_shape = self.kernel.shape.as_list()
            # Flatten the Tensor
            W_reshaped = K.reshape(self.kernel, [-1, W_shape[-1]])
            _u, _v = power_iteration(W_reshaped, self.u)
            # Calculate Sigma
            sigma = K.dot(_v, W_reshaped)
            sigma = K.dot(sigma, K.transpose(_u))
            # normalize it
            W_bar = W_reshaped / sigma
            # reshape weight tensor
            if training in {0, False}:
                W_bar = K.reshape(W_bar, W_shape)
            else:
                with tf.control_dependencies([self.u.assign(_u)]):
                    W_bar = K.reshape(W_bar, W_shape)

            # update weitht
            self.kernel = W_bar

        if self.rank == 1:
            outputs = K.conv1d(inputs,
                               self.kernel,
                               strides=self.strides[0],
                               padding=self.padding,
                               data_format=self.data_format,
                               dilation_rate=self.dilation_rate[0])
        if self.rank == 2:
            outputs = K.conv2d(inputs,
                               self.kernel,
                               strides=self.strides,
                               padding=self.padding,
                               data_format=self.data_format,
                               dilation_rate=self.dilation_rate)
        if self.rank == 3:
            outputs = K.conv3d(inputs,
                               self.kernel,
                               strides=self.strides,
                               padding=self.padding,
                               data_format=self.data_format,
                               dilation_rate=self.dilation_rate)

        if self.use_bias:
            outputs = K.bias_add(outputs,
                                 self.bias,
                                 data_format=self.data_format)

        if self.activation is not None:
            return self.activation(outputs)
        return outputs
Exemple #8
0
def gram_matrix(x, norm_by_channels=False):
    '''
    Returns the Gram matrix of the tensor x.
    '''
    if K.ndim(x) == 3:
        features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
        shape = K.shape(x)
        C, H, W = shape[0], shape[1], shape[2]
        gram = K.dot(features, K.transpose(features))
    elif K.ndim(x) == 4:
        # Swap from (H, W, C) to (B, C, H, W)
        x = K.permute_dimensions(x, (0, 3, 1, 2))
        shape = K.shape(x)
        B, C, H, W = shape[0], shape[1], shape[2], shape[3]
        # Reshape as a batch of 2D matrices with vectorized channels
        features = K.reshape(x, K.stack([B, C, H * W]))
        # This is a batch of Gram matrices (B, C, C).
        gram = K.batch_dot(features, features, axes=2)
    else:
        raise ValueError(
            'The input tensor should be either a 3d (H, W, C) or 4d (B, H, W, C) tensor.'
        )
    # Normalize the Gram matrix
    if norm_by_channels:
        denominator = C * H * W  # Normalization from Johnson
    else:
        denominator = H * W  # Normalization from Google
    gram = gram / K.cast(denominator, x.dtype)

    return gram
Exemple #9
0
def shift(shape, stride, anchors):
    """Produce shifted anchors based on shape of the map and stride size.

    Args:
        shape: Shape to shift the anchors over.
        stride: Stride to shift the anchors with over the shape.
        anchors: The anchors to apply at each location.

    Returns:
        shifted anchors
    """
    shift_x = (K.arange(0, shape[1], dtype=K.floatx()) +
               K.constant(0.5, dtype=K.floatx())) * stride
    shift_y = (K.arange(0, shape[0], dtype=K.floatx()) +
               K.constant(0.5, dtype=K.floatx())) * stride

    shift_x, shift_y = tf.meshgrid(shift_x, shift_y)
    shift_x = K.reshape(shift_x, [-1])
    shift_y = K.reshape(shift_y, [-1])

    shifts = K.stack([shift_x, shift_y, shift_x, shift_y], axis=0)

    shifts = K.transpose(shifts)
    number_of_anchors = K.shape(anchors)[0]

    k = K.shape(shifts)[0]  # number of base points = feat_h * feat_w

    shifts = K.cast(K.reshape(shifts, [k, 1, 4]), K.floatx())
    shifted_anchors = K.reshape(anchors, [1, number_of_anchors, 4]) + shifts
    shifted_anchors = K.reshape(shifted_anchors, [k * number_of_anchors, 4])

    return shifted_anchors
 def call(self, inputs, training=None):
     def _l2normalize(v, eps=1e-12):
         return v / (K.sum(v ** 2) ** 0.5 + eps)
     def power_iteration(W, u):
         _u = u
         _v = _l2normalize(K.dot(_u, K.transpose(W)))
         _u = _l2normalize(K.dot(_v, W))
         return _u, _v
     W_shape = self.kernel.shape.as_list()
     #Flatten the Tensor
     W_reshaped = K.reshape(self.kernel, [-1, W_shape[-1]])
     _u, _v = power_iteration(W_reshaped, self.u)
     #Calculate Sigma
     sigma=K.dot(_v, W_reshaped)
     sigma=K.dot(sigma, K.transpose(_u))
     #normalize it
     W_bar = W_reshaped / sigma
     #reshape weight tensor
     if training in {0, False}:
         W_bar = K.reshape(W_bar, W_shape)
     else:
         with tf.control_dependencies([self.u.assign(_u)]):
              W_bar = K.reshape(W_bar, W_shape)  
     output = K.dot(inputs, W_bar)
     if self.use_bias:
         output = K.bias_add(output, self.bias, data_format='channels_last')
     if self.activation is not None:
         output = self.activation(output)
     return output 
Exemple #11
0
    def call(self, inputs, **kwargs):

        if not inputs.shape[0]:
            return inputs

        recurrent_input = ops.convert_to_tensor(inputs)

        if not self._mixed_precision_policy.should_cast_variables:
            recurrent_input = math_ops.cast(recurrent_input, self.dtype)

        batch_size = recurrent_input.shape[0]

        # Flatten last two dimensions, but along dimension [2]
        flat_recurrent = K.reshape(
            K.permute_dimensions(recurrent_input, (0, 2, 1)), (batch_size, -1))
        outputs = gen_math_ops.mat_mul(
            flat_recurrent,
            tf.math.multiply(self.recurrent_kernel, self.recurrent_mask))

        if self.use_bias:
            outputs = nn.bias_add(outputs, self.bias)
        if self.activation is not None:
            outputs = self.activation(outputs)

        # Transform back outputs to original shape
        outputs = K.reshape(
            K.transpose(outputs),
            (self.target_shape[0], self.target_shape[1], batch_size))
        outputs = K.reshape(
            outputs, (self.target_shape[1], self.target_shape[0], batch_size))
        outputs = K.permute_dimensions(outputs, (2, 1, 0))

        return outputs
Exemple #12
0
 def select_best_leaf(self, y_pred):
     if self.N > self.num_leaves:
         # if there are more leaf nodes than total nodes in the hierarchy (should always be the case,
         # but allowed to work either way) then pad with a zero for each non-leaf node in the taxonomy
         y_pred = self._pad(y_pred)
     # propagate the probabilities (algo 1)
     propagated_probabilities = K.transpose(
         K.dot(self.A, K.transpose(y_pred)))
     # grab the mask vector for root and repeat it <batch size> times
     root = K.repeat(self.root, K.shape(y_pred)[0])
     # reshape into (<batch size>, N)
     predictions = K.reshape(root, (K.shape(y_pred)[0], ))
     # each branch will walk futher out toward leaf nodes (and loops on leaf nodes)
     for _ in range(self.depth):
         predictions = self._branch(propagated_probabilities, predictions)
     return predictions
 def power_iteration(self, u, W):
     '''
     Accroding the paper, we only need to do power iteration one time.
     '''
     v = self._l2normalize(K.dot(u, K.transpose(W)))
     u = self._l2normalize(K.dot(v, W))
     return u, v
Exemple #14
0
    def call(self, inputs, states, training=None):
        # get the standard hidden state from super
        output = super(STTAUCell, self).call(inputs, states)
        h_before = output[0]
        c = output[1][1]

        # the following part modifies the hidden state to create STTAU
        # sizes: B = batch size, H = hidden dimension size,
        # C = number of centroids
        # BxC = BxH & HxC
        unnormalized_probs = K.dot(h_before, self.centroid_kernel)

        # Gumbel-Softmax sample with (learnt) temperature & unnormalized_probs
        q_y = tfp.distributions.RelaxedOneHotCategorical(
            self.temperature_weight, unnormalized_probs)

        # BxC
        y = q_y.sample()
        if self.hard_sample is True:
            # y_hard is a one-hot vector with BxC
            y_hard = tf.cast(tf.one_hot(tf.argmax(y, -1), self.centroids),
                             y.dtype)
            y = tf.stop_gradient(y_hard - y) + y

        # BxH = BxC & CxH
        h_after = K.dot(y, K.transpose(self.centroid_kernel))
        # end of STTAU modification

        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                h_after._uses_learning_phase = True
        return h_before, [h_after, c]
 def build(self, input_shape):
   dtype = dtypes.as_dtype(self.dtype or K.floatx())
   if not (dtype.is_floating or dtype.is_complex):
     raise TypeError('Unable to build `Dense` layer with non-floating point '
                     'dtype %s' % (dtype,))
   input_shape = tensor_shape.TensorShape(input_shape)
   if tensor_shape.dimension_value(input_shape[-1]) is None:
     raise ValueError('The last dimension of the inputs to `Dense` '
                      'should be defined. Found `None`.')
   last_dim = tensor_shape.dimension_value(input_shape[-1])
   self.input_spec = InputSpec(min_ndim=2,
                               axes={-1: last_dim})
   if self.tied_to is not None:
     self.kernel = K.transpose(self.tied_to.weights[0])
   else:
     self.kernel = self.add_weight(
         'kernel',
         shape=[last_dim, self.units],
         initializer=self.kernel_initializer,
         regularizer=self.kernel_regularizer,
         constraint=self.kernel_constraint,
         dtype=self.dtype,
         trainable=True)
   if self.use_bias:
     self.bias = self.add_weight(
         'bias',
         shape=[self.units,],
         initializer=self.bias_initializer,
         regularizer=self.bias_regularizer,
         constraint=self.bias_constraint,
         dtype=self.dtype,
         trainable=True)
   else:
     self.bias = None
   self.built = True
Exemple #16
0
    def call(self, inputs, output_shape=None):
        updates, mask = inputs[0], inputs[1]

        mask = tf.cast(mask, 'int32')
        input_shape = tf.shape(updates, out_type='int32')
        #  calculation new shape
        if output_shape is None:
            output_shape = (input_shape[0], input_shape[1] * self.size[0],
                            input_shape[2] * self.size[1], input_shape[3])

        # calculation indices for batch, height, width and feature maps
        one_like_mask = K.ones_like(mask, dtype='int32')
        batch_shape = K.concatenate([[input_shape[0]], [1], [1], [1]], axis=0)
        batch_range = K.reshape(tf.range(output_shape[0], dtype='int32'),
                                shape=batch_shape)
        b = one_like_mask * batch_range
        y = mask // (output_shape[2] * output_shape[3])
        x = (mask // output_shape[3]) % output_shape[2]
        feature_range = tf.range(output_shape[3], dtype='int32')
        f = one_like_mask * feature_range

        # transpose indices & reshape update values to one dimension
        updates_size = tf.size(updates)
        indices = K.transpose(
            K.reshape(K.stack([b, y, x, f]), [4, updates_size]))
        values = K.reshape(updates, [updates_size])
        ret = tf.scatter_nd(indices, values, output_shape)
        return ret
 def call(self, inputs):
     if K.dtype(inputs) != 'int32':
         inputs = K.cast(inputs, 'int32')
         
     def _l2normalize(v, eps=1e-12):
         return v / (K.sum(v ** 2) ** 0.5 + eps)
     def power_iteration(W, u):
         #Accroding the paper, we only need to do power iteration one time.
         _u = u
         _v = _l2normalize(K.dot(_u, K.transpose(W)))
         _u = _l2normalize(K.dot(_v, W))
         return _u, _v
     W_shape = self.embeddings.shape.as_list()
     #Flatten the Tensor
     W_reshaped = K.reshape(self.embeddings, [-1, W_shape[-1]])
     _u, _v = power_iteration(W_reshaped, self.u)
     #Calculate Sigma
     sigma=K.dot(_v, W_reshaped)
     sigma=K.dot(sigma, K.transpose(_u))
     #normalize it
     W_bar = W_reshaped / sigma
     #reshape weight tensor
     if training in {0, False}:
         W_bar = K.reshape(W_bar, W_shape)
     else:
         with tf.control_dependencies([self.u.assign(_u)]):
             W_bar = K.reshape(W_bar, W_shape)
     self.embeddings = W_bar
         
     out = K.gather(self.embeddings, inputs)
     return out 
Exemple #18
0
    def call(self, inputs):
        X = inputs[0]  # Node features (N x F)
        A = inputs[1]  # Adjacency matrix (N x N)

        outputs = []
        for head in range(self.attn_heads):
            kernel = self.kernels[head]  # W in the paper (F x F")
            attention_kernel = self.attn_kernels[
                head]  # Attention kernel a in the paper (2F" x 1)

            # Compute inputs to attention network
            features = K.dot(X, kernel)  # (N x F")

            # Compute feature combinations
            # Note: [[a_1], [a_2]]^T [[Wh_i], [Wh_2]] = [a_1]^T [Wh_i] + [a_2]^T [Wh_j]
            attn_for_self = K.dot(
                features, attention_kernel[0])  # (N x 1), [a_1]^T [Wh_i]
            attn_for_neighs = K.dot(
                features, attention_kernel[1])  # (N x 1), [a_2]^T [Wh_j]

            # Attention head a(Wh_i, Wh_j) = a^T [[Wh_i], [Wh_j]]
            dense = attn_for_self + K.transpose(
                attn_for_neighs)  # (N x N) via broadcasting

            # Add nonlinearty
            dense = LeakyReLU(alpha=0.2)(dense)

            # Mask values before activation (Vaswani et al., 2017)
            mask = -10e9 * (1.0 - A)
            dense += mask

            # Apply softmax to get attention coefficients
            dense = K.softmax(dense)  # (N x N)

            # Apply dropout to features and attention coefficients
            dropout_attn = Dropout(self.dropout_rate)(dense)  # (N x N)
            dropout_feat = Dropout(self.dropout_rate)(features)  # (N x F")

            # Linear combination with neighbors" features
            node_features = K.dot(dropout_attn, dropout_feat)  # (N x F")

            if self.use_bias:
                node_features = K.bias_add(node_features, self.biases[head])

            if self.attn_heads_reduction == "concat":
                # If "concat", compute the activation here (Eq. 5)
                node_features = self.activation(node_features)

            # Add output of attention head to final output
            outputs.append(node_features)

        # Aggregate the heads" output according to the reduction method
        if self.attn_heads_reduction == "concat":
            output = K.concatenate(outputs)  # (N x KF")
        else:
            output = K.mean(K.stack(outputs), axis=0)  # N x F")

        output = self.activation(output)
        return output
Exemple #19
0
def gram_matrix(x):
    assert K.ndim(x) == 3
    if K.image_dim_ordering() == "th":
        features = K.batch_flatten(x)
    else:
        features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
    gram = K.dot(features, K.transpose(features))
    return gram
Exemple #20
0
def gram_matrix(x):
    assert K.ndim(x) == 3
    if K.image_data_format() == "channels_first":
        features = K.batch_flatten(x)
    else:
        features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
    gram = K.dot(features, K.transpose(features))
    return gram
Exemple #21
0
 def compute_win(self, y_true, y_pred, to_numpy=False):
     if self.N > self.num_leaves:
         # if there are more leaf nodes than total nodes in the hierarchy (should always be the case,
         # but allowed to work either way) then pad with a zero for each non-leaf node in the taxonomy
         y_true = self._pad(y_true)
         y_pred = self._pad(y_pred)
     # propagate the probabilities (algo 1)
     propagated_probabilities = K.dot(self.A, K.transpose(y_pred))
     # find the index from the actual label
     win_idx = self.select_correct_idx(y_true)
     # find the mask associated with that label
     win_mask = tf.gather(self.W, win_idx)
     # win is q . w (algo 2)
     win = K.batch_dot(win_mask, K.transpose(propagated_probabilities))
     # win is in [0.5,1], remap to [0,1]:
     remapped = 2 * (win - 0.5)
     if to_numpy:
         remapped = K.reshape(remapped, []).numpy()
     return remapped
    def call(self, x, mask=None):
        # print(x[0].shape)
        # print(x[1].shape)

        # x[0] is Nx2, x[1] is Nx8 onehot, self.centers is 8x2
        delta_centers = K.dot(K.transpose(x[1]),
                              (K.dot(x[1], self.centers) - x[0]))  # 8x2
        center_counts = K.sum(K.transpose(x[1]), axis=1,
                              keepdims=True) + 1  # 8x1
        delta_centers /= center_counts
        new_centers = self.centers - self.alpha * delta_centers
        self.add_update((self.centers, new_centers), x)

        # self.add_update((self.counter, self.counter + 1), x)

        self.result = x[0] - K.dot(x[1], self.centers)
        self.result = K.sum(self.result**2, axis=1,
                            keepdims=True)  # / K.dot(x[1], center_counts)
        return self.result  # Nx1
Exemple #23
0
    def custom_loss(y_true, y_pred):
        """Args: y_true -- label vector of shape (batch_size, num_classes)"""
        samples_per_cluster = K.transpose(
            K.sum(y_true, axis=0, keepdims=True) +
            1)  # Add 1 to avoid division by zero
        centers = K.dot(K.transpose(y_true), features) / samples_per_cluster
        center_loss = 0.5 * K.sum(K.square(features - K.dot(y_true, centers)))

        center_dot_combinations = K.dot(centers, K.transpose(centers))
        center_dot_combinations_normed = K.sqrt(
            K.square(center_dot_combinations))
        pair_dist = center_dot_combinations / center_dot_combinations_normed
        # subtract diagonal of pair_dist which only contains ones
        pair_dist = pair_dist - K.eye(num_classes)
        pair_dist = pair_dist + 1
        pair_dist = K.sum(pair_dist)

        island_loss = center_loss + pair_dist

        return categorical_crossentropy(y_true, y_pred) + island_loss
Exemple #24
0
    def call(self, inputs, **kwargs):
        pair1_embed, pair2_embed = inputs

        pair1_embed = K.l2_normalize(pair1_embed, axis=-1)
        pair2_embed = K.l2_normalize(pair2_embed, axis=-1)

        sim = K.dot(pair1_embed, K.transpose(pair2_embed))

        sim = tf.linalg.tensor_diag_part(sim)

        return sim
def power_iteration(W, u, rounds=1):
    '''
    Accroding the paper, we only need to do power iteration one time.
    '''
    _u = u

    for i in range(rounds):
        _v = _l2normalizer(K.dot(_u, W))
        _u = _l2normalizer(K.dot(_v, K.transpose(W)))

    W_sn = K.sum(K.dot(_u, W) * _v)
    return W_sn, _u, _v
Exemple #26
0
def model(embedding_size, n_a):
    # word embedding matrix
    #word_vec = Input(shape=(embedding_size), name='Words') # batch, 300
    word_vec = tf.constant(answer_emb, name='Words', dtype='float32')    
    # preprocessing sentences into sentence vectors
    sentence = Input(shape=(T, embedding_size), name='Sentences') # batch, 50, 300
    sentence_vec = Bidirectional(CuDNNGRU(units=n_a, return_sequences=False), name='Sentence_Vectors')(sentence) # batch, 300
    # dot
    #product = Dot(axes=-1, normalize=False, name='Matrix')([word_vec, sentence_vec])
    product = tf.matmul(word_vec, sentence_vec, transpose_b = True, name = 'Matrix')
    key_matrix = K.transpose(product)
    model = Model(inputs= sentence, outputs=key_matrix)
    return model
Exemple #27
0
 def gram_matrix(x):
     assert K.ndim(x) == 4
     grams = list()
     for i in range(self.Batch_Size):
         img = x[i, :, :, :]
         if K.image_data_format() == 'channels_first':
             features = K.batch_flatten(img)
         else:
             features = K.batch_flatten(
                 K.permute_dimensions(img, (2, 0, 1)))
         grams.append(K.dot(features, K.transpose(features)))
     gram = tf.keras.backend.stack(grams)
     return gram
    def call(self, code_block: Tensor, training=False, **kwargs):
        # Note: all layers are wrapped with TimeDistributed, thus the shapes have number of
        # [batch size, timesteps (token length), features (1 the subtoken value), Etc]
        # each subtoken is considered a timestep

        # create a mask of the padding sequence of the input
        mask_vector = K.cast(K.equal(code_block, 0), dtype='float32') * -1e7
        # mask_vector [batch size, max chunk length, 1]
        self.logger.info("mask_vector shape = {}".format(mask_vector.shape))

        # code_block = Masking(mask_value=0, )(code_block)
        tokens_embedding = self.embedding_layer(code_block)
        self.logger.info("Tokens shape = {}".format(tokens_embedding.shape))
        # tokens_embedding = [batch_size, max chunk length, embedding_dim]

        _, h_t = self.gru_layer(tokens_embedding, training=training)
        # h_t = [batch_size, k2)
        self.logger.info("h_t shape = {}".format(h_t.shape))
        l_feat = self.attention_feature_layer([tokens_embedding, h_t])
        self.logger.info("L_feat shape = {}".format(l_feat.shape))

        # L_feat = [batch size, token length, k2]
        alpha = self.attention_weights_layer([l_feat, mask_vector])
        self.logger.info("alpha shape = {}".format(alpha.shape))
        # alpha = [batch size, token length] weights over embeddings

        # apply the attention to the input embedding
        n_hat = K.sum((K.expand_dims(alpha, axis=-1) * tokens_embedding),
                      axis=1)
        self.logger.info("n_hat shape = {}".format(n_hat.shape))
        # n_hat = [batch size, embedding dim]

        # embedding over all vocabulary
        E = self.embedding_layer.layer.embeddings
        self.logger.info("E shape = {}".format(E.shape))
        # E = [vocabulary size, embedding dim]

        # Apply attention to the words over all embeddings
        n_hat_E = K.nn.math_ops.tensordot(E,
                                          K.transpose(n_hat),
                                          axes=[[1], [0]])
        # n_hat_E = [vocabulary size, token length, batch size]
        n_hat_E = K.permute_dimensions(n_hat_E, [2, 1, 0])
        self.logger.info("n_hat_E shape = {}".format(n_hat_E.shape))
        # n_hat_E = [batch size, token length, vocabulary size]

        n = self.softmax_layer(K.bias_add(n_hat_E, self.bias))
        self.logger.info("n shape = {}".format(n.shape))
        # n = [batch size, vocabulary size] the probability of each token in the vocabulary

        return n
Exemple #29
0
def gram_matrix(x):
    assert K.ndim(x) == 3
    if image_dim_ordering() == 'th':
        features = K.batch_flatten(x)
    else:
        features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))

    shape = K.shape(x)
    
    C, W, H = (shape[0],shape[1], shape[2])
    
    cf = K.reshape(features ,(C,-1))
    gram = K.dot(cf, K.transpose(cf)) /  K.cast(C*W*H,dtype='float32')

    return gram
    def call(self, x, mask=None):
        # x[0] is N x feature_dim, x[1] is N x num_classes onehot, self.centers is num_classes x feature_dim
        delta_centers = K.dot(
            K.transpose(x[1]),
            (K.dot(x[1], self.centers) - x[0]))  # num_classes x feature_dim
        center_counts = K.sum(K.transpose(x[1]), axis=1,
                              keepdims=True) + 1  # num_classes x 1
        delta_centers /= center_counts
        new_centers = self.centers - self.alpha * delta_centers
        self.add_update((self.centers, new_centers), x)

        # self.add_update((self.counter, self.counter + 1), x)

        center_loss = x[0] - K.dot(x[1], self.centers)
        center_loss = K.sum(self.result**2, axis=1,
                            keepdims=True)  # / K.dot(x[1], center_counts)

        pair_dist = K.dot(K.transpose(self.centers), self.centers)
        pair_dist = pair_dist - K.dot(self.centers, self.centers)
        pair_dist = pair_dist / K.sqrt(K.square(pair_dist))
        pair_dist = K.sum(pair_dist, keepdims=True)

        self.result = center_loss - pair_dist
        return self.result  # Nx1