Esempio n. 1
0
def gram_matrix(x):
    if K.image_dim_ordering() == "th":
        features = K.batch_flatten(x)
    else:
        features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
    gram = K.dot(features, K.transpose(features))
    return gram
Esempio n. 2
0
def gram_matrix(x):
    assert K.ndim(x) == 3
    if K.image_data_format() == 'channels_first':
        features = K.batch_flatten(x)
    else:
        features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
    gram = K.dot(features, K.transpose(features))
    return gram
Esempio n. 3
0
 def func(y_true, y_pred):
     y_true = K.batch_flatten(y_true)
     y_pred = K.batch_flatten(y_pred)
     Y_true = K.reshape(y_true, (-1, ) + img_shape)
     Y_pred = K.reshape(y_pred, (-1, ) + img_shape)
     t1 = K.pow(K.abs(Y_true[:, :, 1:, :] - Y_true[:, :, :-1, :]) -
                K.abs(Y_pred[:, :, 1:, :] - Y_pred[:, :, :-1, :]), alpha)
     t2 = K.pow(K.abs(Y_true[:, :, :, :-1] - Y_true[:, :, :, 1:]) -
                K.abs(Y_pred[:, :, :, :-1] - Y_pred[:, :, :, 1:]), alpha)
     out = K.mean(K.batch_flatten(t1 + t2), -1)
     return out
Esempio n. 4
0
def dice_coef(y_true, y_pred, smooth=smooth_default, per_batch=True):
    if not per_batch:
        y_true_f = K.flatten(y_true)
        y_pred_f = K.flatten(y_pred)
        intersection = K.sum(y_true_f * y_pred_f)
        return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
    else:
        y_true_f = K.batch_flatten(y_true)
        y_pred_f = K.batch_flatten(y_pred)
        intersec = 2. * K.sum(y_true_f * y_pred_f, axis=1, keepdims=True) + smooth
        union = K.sum(y_true_f, axis=1, keepdims=True) + K.sum(y_pred_f, axis=1, keepdims=True) + smooth
        return K.mean(intersec / union)
Esempio n. 5
0
def _label_to_one_hot(tens, nb_labels):
    """
    Transform a label nD Tensor to a one-hot 3D Tensor. The input tensor is first
    batch-flattened, and then each batch and each voxel gets a one-hot representation
    """
    y = K.batch_flatten(tens)
    return K.one_hot(y, nb_labels)
Esempio n. 6
0
def gram_matrix(x):
    assert Kr.ndim(x) == 3

    features = Kr.batch_flatten(x)
    gram = Kr.dot(features, Kr.transpose(features))

    return gram
def gram_matrix(x):
    #change height,width,depth to depth, height, width, it could be 2,1,0 too
    #maybe 2,0,1 is more efficient due to underlying memory layout
    features = K.permute_dimensions(x, (2,0,1))
    #batch flatten make features become 2D array
    features = K.batch_flatten(features)
    return K.dot(features, K.transpose(features)) / x.get_shape().num_elements()    
def gram_matrix(img):
  # input is (H, W, C) (C = # feature maps)
  # we first need to convert it to (C, H*W)
  X = K.batch_flatten(K.permute_dimensions(img, (2, 0, 1)))
  
  # now, calculate the gram matrix
  # gram = XX^T / N
  # the constant is not important since we'll be weighting these
  G = K.dot(X, K.transpose(X)) / img.get_shape().num_elements()
  return G
Esempio n. 9
0
def gram_matrix(x):
	"""
	Computes the outer-product of the input tensor x.

	Input
	-----
	- x: input tensor of shape (C x H x W)

	Returns
	-------
	- x . x^T

	Note that this can be computed efficiently if x is reshaped
	as a tensor of shape (C x H*W).
	"""
	# assert K.ndim(x) == 3
	if K.image_dim_ordering() == 'th':
		features = K.batch_flatten(x)
	else:
		features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
	return K.dot(features, K.transpose(features))
Esempio n. 10
0
    def step(self, x, states):
        batch_size = self._get_batch_size(x)
        input_shape = (batch_size, ) + self.reshape_dim
        hidden_dim = (batch_size, ) + self.output_dim
        nb_filter, nb_rows, nb_cols = self.output_dim
        h_tm1 = K.reshape(states[0], hidden_dim)

        x_t = K.reshape(x, input_shape)
        Wx_t = self.conv_x(x_t, train=True)
        h_t = self.activation(Wx_t + self.conv_h(h_tm1, train=True))
        h_t = K.batch_flatten(h_t)
        return h_t, [h_t, ]
Esempio n. 11
0
def gram_matrix(x):
    """
    the gram matrix of an image tensor (feature-wise outer product)
    :param x: The tensor contains image features
    :return: A gram matrix
    """
    if K.ndim(x) == 4:
        x = x[0, :, :, :]
    assert K.ndim(x) == 3
    features = K.batch_flatten(x)
    gram = K.dot(features, K.transpose(features))
    return gram
def competitionMetric2(true, pred): #any shape can go
    tresholds = [0.5 + (i*.05)  for i in range(10)]

    #flattened images (batch, pixels)
    true = K.batch_flatten(true)
    pred = K.batch_flatten(pred)
    pred = castF(K.greater(pred, 0.5))

    #total white pixels - (batch,)
    trueSum = K.sum(true, axis=-1)
    predSum = K.sum(pred, axis=-1)

    #has mask or not per image - (batch,)
    true1 = castF(K.greater(trueSum, 1))
    pred1 = castF(K.greater(predSum, 1))

    #to get images that have mask in both true and pred
    truePositiveMask = castB(true1 * pred1)

    #separating only the possible true positives to check iou
    testTrue = tf.boolean_mask(true, truePositiveMask)
    testPred = tf.boolean_mask(pred, truePositiveMask)

    #getting iou and threshold comparisons
    iou = iou_loss_core(testTrue,testPred)
    truePositives = [castF(K.greater(iou, tres)) for tres in tresholds]

    #mean of thressholds for true positives and total sum
    truePositives = K.mean(K.stack(truePositives, axis=-1), axis=-1)
    truePositives = K.sum(truePositives)

    #to get images that don't have mask in both true and pred
    trueNegatives = (1-true1) * (1 - pred1) # = 1 -true1 - pred1 + true1*pred1
    trueNegatives = K.sum(trueNegatives)
    
    return (truePositives + trueNegatives) / castF(K.shape(true)[0])
Esempio n. 13
0
    def call(self, inputs, **kwargs):
        if type(inputs) is list:  # true label is provided with shape = [None, n_classes], i.e. one-hot code.
            assert len(inputs) == 2
            inputs, mask = inputs
        else:  # if no true label, mask by the max length of capsules. Mainly used for prediction
            # compute lengths of capsules
            x = K.sqrt(K.sum(K.square(inputs), -1))
            # generate the mask which is a one-hot code.
            # mask.shape=[None, n_classes]=[None, num_capsule]
            mask = K.one_hot(indices=K.argmax(x, 1), num_classes=x.get_shape().as_list()[1])

        # inputs.shape=[None, num_capsule, dim_capsule]
        # mask.shape=[None, num_capsule]
        # masked.shape=[None, num_capsule * dim_capsule]
        masked = K.batch_flatten(inputs * K.expand_dims(mask, -1))
        return masked
Esempio n. 14
0
    def get_output(self, train=True):
        X = self.get_input(train)
        out_dim = np.prod(self.model.output_shape[1:])
        # batch_size = self._get_batch_size(X)
        if K._BACKEND == 'theano':
            time_len = K.shape(X)[1]
            new_shape = (-1, time_len, out_dim)
        else:
            # time_len = self.input_shape[2:3]
            time_len = K.shape(X)[1]
            new_shape = K.concatenate([np.asarray([-1, ]), time_len,
                                       np.asarray([out_dim, ])])

        reshape_dim = (-1, ) + self.model.input_shape[1:]
        Inp = K.batch_flatten(X)  # (sample*time, dim)
        Inp = K.reshape(Inp, reshape_dim)  # (sample*time, dim1, dim2, ...)
        Y = self.model(Inp, train=train)
        Y = K.reshape(Y, new_shape)  # (sample, time, dim_out)
        return Y
Esempio n. 15
0
    def step(self, x, states):
        batch_size = self._get_batch_size(x)
        input_shape = (batch_size, ) + self.reshape_dim
        hidden_dim = (batch_size, ) + self.output_dim
        nb_filter, nb_rows, nb_cols = self.output_dim
        h_tm1 = K.reshape(states[0], hidden_dim)

        x_t = K.reshape(x, input_shape)
        xz_t = self.conv_x_z(x_t, train=True)
        xr_t = self.conv_x_r(x_t, train=True)
        xh_t = self.conv_x_h(x_t, train=True)

        xz_t = apply_layer(self.max_pool, xz_t)
        xr_t = apply_layer(self.max_pool, xr_t)
        xh_t = apply_layer(self.max_pool, xh_t)

        z = self.inner_activation(xz_t + self.conv_z(h_tm1))
        r = self.inner_activation(xr_t + self.conv_r(h_tm1))
        hh_t = self.activation(xh_t + self.conv_h(r * h_tm1))
        h_t = z * h_tm1 + (1 - z) * hh_t
        h_t = K.batch_flatten(h_t)
        return h_t, [h_t, ]
Esempio n. 16
0
def _global_max_nd(xtens):
    ytens = K.batch_flatten(xtens)
    return K.max(ytens, 1, keepdims=True)
Esempio n. 17
0
def dice_coef_spec(y_true, y_pred):
    y_true_f = K.batch_flatten(y_true)
    y_pred_f = K.batch_flatten(y_pred)
    intersection = 2. * K.sum(y_true_f * y_pred_f, axis=1, keepdims=True) + smooth
    union = K.sum(y_true_f, axis=1, keepdims=True) + K.sum(y_pred_f, axis=1, keepdims=True) + smooth
    return K.mean(intersection / union)
Esempio n. 18
0
def gram_matrix(x):
    features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
    gram = K.dot(features, K.transpose(features))
    return gram
Esempio n. 19
0
 def normalize_vector(x):
     z = K.sum(K.batch_flatten(K.square(x)), axis=1)
     while K.ndim(z) < K.ndim(x):
         z = K.expand_dims(z, dim=-1)
     return x / (K.sqrt(z) + K.epsilon())
Esempio n. 20
0
    def call(self, x, mask=None):
        # TODO: validate input shape

        assert (len(x) == 3)
        L_flat = x[0]
        mu = x[1]
        a = x[2]

        if self.mode == 'full':
            # Create L and L^T matrix, which we use to construct the
            # positive-definite matrix P.
            L = None
            LT = None
            if K.backend() == 'theano':
                import theano.tensor as T
                import theano

                def fn(x, L_acc, LT_acc):
                    x_ = K.zeros((self.nb_actions, self.nb_actions))
                    x_ = T.set_subtensor(x_[np.tril_indices(self.nb_actions)],
                                         x)
                    diag = K.exp(T.diag(x_)) + K.epsilon()
                    x_ = T.set_subtensor(x_[np.diag_indices(self.nb_actions)],
                                         diag)
                    return x_, x_.T

                outputs_info = [
                    K.zeros((self.nb_actions, self.nb_actions)),
                    K.zeros((self.nb_actions, self.nb_actions)),
                ]
                results, _ = theano.scan(fn=fn,
                                         sequences=L_flat,
                                         outputs_info=outputs_info)
                L, LT = results
            elif K.backend() == 'tensorflow':
                import tensorflow as tf

                # Number of elements in a triangular matrix.
                nb_elems = (self.nb_actions * self.nb_actions +
                            self.nb_actions) // 2

                # Create mask for the diagonal elements in L_flat. This is used to exponentiate
                # only the diagonal elements, which is done before gathering.
                diag_indeces = [0]
                for row in range(1, self.nb_actions):
                    diag_indeces.append(diag_indeces[-1] + (row + 1))
                diag_mask = np.zeros(1 + nb_elems)  # +1 for the leading zero
                diag_mask[np.array(diag_indeces) + 1] = 1
                diag_mask = K.variable(diag_mask)

                # Add leading zero element to each element in the L_flat. We use this zero
                # element when gathering L_flat into a lower triangular matrix
                # L.
                nb_rows = tf.shape(L_flat)[0]
                zeros = tf.expand_dims(tf.tile(K.zeros((1, )), [nb_rows]), 1)
                try:
                    # Old TF behavior.
                    L_flat = tf.concat(1, [zeros, L_flat])
                except (TypeError, ValueError):
                    # New TF behavior
                    L_flat = tf.concat([zeros, L_flat], 1)

                # Create mask that can be used to gather elements from L_flat and put them
                # into a lower triangular matrix.
                tril_mask = np.zeros((self.nb_actions, self.nb_actions),
                                     dtype='int32')
                tril_mask[np.tril_indices(self.nb_actions)] = range(
                    1, nb_elems + 1)

                # Finally, process each element of the batch.
                init = [
                    K.zeros((self.nb_actions, self.nb_actions)),
                    K.zeros((self.nb_actions, self.nb_actions)),
                ]

                def fn(a, x):
                    # Exponentiate everything. This is much easier than only exponentiating
                    # the diagonal elements, and, usually, the action space is
                    # relatively low.
                    x_ = K.exp(x) + K.epsilon()
                    # Only keep the diagonal elements.
                    x_ *= diag_mask
                    # Add the original, non-diagonal elements.
                    x_ += x * (1. - diag_mask)
                    # Finally, gather everything into a lower triangular
                    # matrix.
                    L_ = tf.gather(x_, tril_mask)
                    return [L_, tf.transpose(L_)]

                tmp = tf.scan(fn, L_flat, initializer=init)
                if isinstance(tmp, (list, tuple)):
                    # TensorFlow 0.10 now returns a tuple of tensors.
                    L, LT = tmp
                else:
                    # Old TensorFlow < 0.10 returns a shared tensor.
                    L = tmp[:, 0, :, :]
                    LT = tmp[:, 1, :, :]
            else:
                raise RuntimeError('Unknown Keras backend "{}".'.format(
                    K.backend()))
            assert L is not None
            assert LT is not None
            P = K.batch_dot(L, LT)
        elif self.mode == 'diag':
            if K.backend() == 'theano':
                import theano.tensor as T
                import theano

                def fn(x, P_acc):
                    x_ = K.zeros((self.nb_actions, self.nb_actions))
                    x_ = T.set_subtensor(x_[np.diag_indices(self.nb_actions)],
                                         x)
                    return x_

                outputs_info = [
                    K.zeros((self.nb_actions, self.nb_actions)),
                ]
                P, _ = theano.scan(fn=fn,
                                   sequences=L_flat,
                                   outputs_info=outputs_info)
            elif K.backend() == 'tensorflow':
                import tensorflow as tf

                # Create mask that can be used to gather elements from L_flat and put them
                # into a diagonal matrix.
                diag_mask = np.zeros((self.nb_actions, self.nb_actions),
                                     dtype='int32')
                diag_mask[np.diag_indices(self.nb_actions)] = range(
                    1, self.nb_actions + 1)

                # Add leading zero element to each element in the L_flat. We use this zero
                # element when gathering L_flat into a lower triangular matrix
                # L.
                nb_rows = tf.shape(L_flat)[0]
                zeros = tf.expand_dims(tf.tile(K.zeros((1, )), [nb_rows]), 1)
                try:
                    # Old TF behavior.
                    L_flat = tf.concat(1, [zeros, L_flat])
                except (TypeError, ValueError):
                    # New TF behavior
                    L_flat = tf.concat([zeros, L_flat], 1)

                # Finally, process each element of the batch.
                def fn(a, x):
                    x_ = tf.gather(x, diag_mask)
                    return x_

                P = tf.scan(fn,
                            L_flat,
                            initializer=K.zeros(
                                (self.nb_actions, self.nb_actions)))
            else:
                raise RuntimeError('Unknown Keras backend "{}".'.format(
                    K.backend()))
        assert P is not None
        assert K.ndim(P) == 3

        # Combine a, mu and P into a scalar (over the batches). What we compute here is
        # -.5 * (a - mu)^T * P * (a - mu), where * denotes the dot-product. Unfortunately
        # TensorFlow handles vector * P slightly suboptimal, hence we convert the vectors to
        # 1xd/dx1 matrices and finally flatten the resulting 1x1 matrix into a scalar. All
        # operations happen over the batch size, which is dimension 0.
        prod = K.batch_dot(K.expand_dims(a - mu, 1), P)
        prod = K.batch_dot(prod, K.expand_dims(a - mu, -1))
        A = -.5 * K.batch_flatten(prod)
        assert K.ndim(A) == 2
        return A
Esempio n. 21
0
    def call(self, x, mask=None):
        # The input of this layer is [L, mu, a] in concatenated form. We first split
        # those up.
        idx = 0
        L_flat = x[:, idx:idx + (self.nb_actions * self.nb_actions + self.nb_actions) // 2]
        idx += (self.nb_actions * self.nb_actions + self.nb_actions) // 2
        mu = x[:, idx:idx + self.nb_actions]
        idx += self.nb_actions
        a = x[:, idx:idx + self.nb_actions]
        idx += self.nb_actions

        # Create L and L^T matrix, which we use to construct the positive-definite matrix P.
        L = None
        LT = None
        if K._BACKEND == 'theano':
            import theano.tensor as T
            import theano

            def fn(x, L_acc, LT_acc):
                x_ = K.zeros((self.nb_actions, self.nb_actions))
                x_ = T.set_subtensor(x_[np.tril_indices(self.nb_actions)], x)
                diag = K.exp(T.diag(x_))
                x_ = T.set_subtensor(x_[np.diag_indices(self.nb_actions)], diag)
                return x_, x_.T

            outputs_info = [
                K.zeros((self.nb_actions, self.nb_actions)),
                K.zeros((self.nb_actions, self.nb_actions)),
            ]
            results, _ = theano.scan(fn=fn, sequences=L_flat, outputs_info=outputs_info)
            L, LT = results
        elif K._BACKEND == 'tensorflow':
            import tensorflow as tf

            # Number of elements in a triangular matrix.
            nb_elems = (self.nb_actions * self.nb_actions + self.nb_actions) // 2

            # Create mask for the diagonal elements in L_flat. This is used to exponentiate
            # only the diagonal elements, which is done before gathering.
            diag_indeces = [0]
            for row in range(1, self.nb_actions):
                diag_indeces.append(diag_indeces[-1] + (row + 1))
            diag_mask = np.zeros(1 + nb_elems)  # +1 for the leading zero
            diag_mask[np.array(diag_indeces) + 1] = 1
            diag_mask = K.variable(diag_mask)

            # Add leading zero element to each element in the L_flat. We use this zero
            # element when gathering L_flat into a lower triangular matrix L.
            nb_rows = tf.shape(L_flat)[0]
            zeros = tf.expand_dims(tf.tile(K.zeros((1,)), [nb_rows]), 1)
            L_flat = tf.concat(1, [zeros, L_flat])
            
            # Create mask that can be used to gather elements from L_flat and put them
            # into a lower triangular matrix.
            tril_mask = np.zeros((self.nb_actions, self.nb_actions), dtype='int32')
            tril_mask[np.tril_indices(self.nb_actions)] = range(1, nb_elems + 1)
            
            # Finally, process each element of the batch.
            init = [
                K.zeros((self.nb_actions, self.nb_actions)),
                K.zeros((self.nb_actions, self.nb_actions)),
            ]
            
            def fn(a, x):
                # Exponentiate everything. This is much easier than only exponentiating
                # the diagonal elements, and, usually, the action space is relatively low.
                x_ = K.exp(x)
                # Only keep the diagonal elements.
                x_ *= diag_mask
                # Add the original, non-diagonal elements.
                x_ += x * (1. - diag_mask)
                # Finally, gather everything into a lower triangular matrix.
                L_ = tf.gather(x_, tril_mask)
                return [L_, tf.transpose(L_)]

            tmp = tf.scan(fn, L_flat, initializer=init)
            L = tmp[:, 0, :, :]
            LT = tmp[:, 1, :, :]
        else:
            raise RuntimeError('Unknown Keras backend "{}".'.format(K._BACKEND))
        assert L is not None
        assert LT is not None
        P = K.batch_dot(L, LT)
        assert K.ndim(P) == 3

        # Combine a, mu and P into a scalar (over the batches). What we compute here is
        # -.5 * (a - mu)^T * P * (a - mu), where * denotes the dot-product. Unfortunately
        # TensorFlow handles vector * P slightly suboptimal, hence we convert the vectors to
        # 1xd/dx1 matrices and finally flatten the resulting 1x1 matrix into a scalar. All
        # operations happen over the batch size, which is dimension 0.
        prod = K.batch_dot(K.expand_dims(a - mu, dim=1), P)
        prod = K.batch_dot(prod, K.expand_dims(a - mu, dim=-1))
        A = -.5 * K.batch_flatten(prod)
        assert K.ndim(A) == 2
        return A
Esempio n. 22
0
 def compute_mask(self, inputs, mask=None):
     if mask==None:
         return mask
     return K.batch_flatten(mask)
Esempio n. 23
0
def SE(x, y):
    "Square Error"
    return K.sum(K.square(K.batch_flatten(x) - K.batch_flatten(y)), axis=-1)
Esempio n. 24
0
    def gram_matrix(X):
        features = K.permute_dimensions(X, (2, 0, 1))
        features = K.batch_flatten(features)
        features = K.dot(features, K.transpose(features))

        return features
Esempio n. 25
0
def MAE(x, y):
    return mae(K.batch_flatten(x),
               K.batch_flatten(y))
Esempio n. 26
0
def BCE2(x, y):
    "Do not average over bits"
    return K.sum(K.binary_crossentropy(K.batch_flatten(x), K.batch_flatten(y)), axis=-1)
Esempio n. 27
0
def MSE(x, y):
    return mse(K.batch_flatten(x),
               K.batch_flatten(y))
Esempio n. 28
0
def BCE(x, y):
    return bce(K.batch_flatten(x),
               K.batch_flatten(y))
Esempio n. 29
0
def l1_loss(y_true, y_pred):
    y_true_flat = K.batch_flatten(y_true)
    y_pred_flat = K.batch_flatten(y_pred)
    return K.sum(K.abs(y_pred_flat - y_true_flat), axis=-1)
Esempio n. 30
0
    def step(self, a, states):

        r_tm1 = states[:self.nb_layers]
        c_tm1 = states[self.nb_layers:2*self.nb_layers]
        e_tm1 = states[2*self.nb_layers:3*self.nb_layers]

        if self.extrap_start_time is not None:
            t = states[-1]
            a = K.switch(t >= self.t_extrap, states[-2], a)  # if past self.extrap_start_time, the previous prediction will be treated as the actual

        c = []
        r = []
        e = []
        
        
        
        for l in reversed(range(self.nb_layers)):
            inputs = [r_tm1[l], e_tm1[l]]
            if l < self.nb_layers - 1:
                inputs.append(r_up)

            inputs = K.concatenate(inputs, axis=self.channel_axis)
            i = self.conv_layers['i'][l].call(inputs)
            f = self.conv_layers['f'][l].call(inputs)
            o = self.conv_layers['o'][l].call(inputs)
            _c = f * c_tm1[l] + i * self.conv_layers['c'][l].call(inputs)
            _r = o * self.LSTM_activation(_c)
            c.insert(0, _c)
            r.insert(0, _r)

            if l > 0:
                r_up = self.upsample.call(_r)

        for l in range(self.nb_layers):
            ahat = self.conv_layers['ahat'][l].call(r[l])
            if l == 0:
                ahat = K.minimum(ahat, self.pixel_max)
                frame_prediction = ahat

            # compute errors
            e_up = self.error_activation(ahat - a)
            e_down = self.error_activation(a - ahat)
            


            e.append(K.concatenate((e_up, e_down), axis=self.channel_axis))

            if l < self.nb_layers - 1:
                a = self.conv_layers['a'][l].call(e[l])
                a = self.pool.call(a)  # target for next layer

        if self.output_mode == 'prediction':
            output = frame_prediction
        else:
            for l in range(self.nb_layers):
                layer_error = K.mean(K.batch_flatten(e[l]), axis=-1, keepdims=True)
                all_error = layer_error if l == 0 else K.concatenate((all_error, layer_error), axis=-1)
            if self.output_mode == 'error':
                output = all_error
            else:
                output = K.concatenate((K.batch_flatten(frame_prediction), all_error), axis=-1)

        states = r + c + e
        if self.extrap_start_time is not None:
            states += [frame_prediction, t + 1]
        return output, states
Esempio n. 31
0
 def call(self, x, mask=None):
     x = x[:, :self.keep_dim]
     return K.batch_flatten(x)
 def build_gram_matrix(x):
     features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
     gram_matrix = K.dot(features, K.transpose(features))
     return gram_matrix
Esempio n. 33
0
    def call(self, a, states):
        c_tm1 = states[:self.nb_layers]
        e_tm1 = states[self.nb_layers:2 * self.nb_layers]
        r_tm1 = states[2 * self.nb_layers:3 * self.nb_layers]

        if self.extrap_start_time is not None:
            t = states[-1]
            # The previous prediction will be treated as the actual if t between t_extrap_start and t_extrap_end
            a = K.switch(
                tf.logical_and(t >= self.t_extrap_start,
                               t < self.t_extrap_end), states[-2], a)

        c = []
        r = []
        e = []

        # Update R units starting from the top
        for l in reversed(range(self.nb_layers)):
            inputs = [r_tm1[l], e_tm1[l]]
            if l < self.nb_layers - 1:
                inputs.append(_r)

            inputs = K.concatenate(inputs)
            i = self.layers['i'][l].call(inputs)
            f = self.layers['f'][l].call(inputs)
            o = self.layers['o'][l].call(inputs)
            _c = f * c_tm1[l] + i * self.layers['c'][l].call(inputs)
            if l == 0:
                _r = o * _c
            else:
                _r = o * activations.tanh(_c)
            c.insert(0, _c)
            r.insert(0, _r)

        # Update feed-forward path starting from the bottom
        for l in range(self.nb_layers):
            ahat = self.layers['ahat'][l].call(r[l])
            if l == 0:
                prediction = ahat

            # compute errors
            e_up = activations.relu(ahat - a)
            e_down = activations.relu(a - ahat)

            e.append(K.concatenate([e_up, e_down]))

            if l < self.nb_layers - 1:
                a = self.layers['a'][l].call(e[l])

        if self.output_mode == 'prediction':
            output = prediction
        else:
            for l in range(self.nb_layers):
                layer_error = K.mean(K.batch_flatten(e[l]),
                                     axis=-1,
                                     keepdims=True)
                all_error = layer_error if l == 0 else K.concatenate(
                    [all_error, layer_error])
            if self.output_mode == 'error':
                output = all_error
            else:
                output = K.concatenate([prediction, all_error])

        states = c + e + r
        if self.extrap_start_time is not None:
            states += [prediction, t + 1]
        return output, states
Esempio n. 34
0
 def compute_mask(self, x, mask=None):
     if mask is None or not self.return_mask:
         return None
     return K.batch_flatten(mask)
Esempio n. 35
0
 def d_loss(y_true, y_pred):
     #L = objectives.binary_crossentropy(K.batch_flatten(y_true), K.batch_flatten(y_pred))
     L = objectives.mean_squared_error(K.batch_flatten(y_true), K.batch_flatten(y_pred))
     return L
Esempio n. 36
0
 def call(self, inputs, mask=None):
     return K.batch_flatten(inputs)
Esempio n. 37
0
    def compile(self):
        inner_grad = self.post_grad(
            K.gradients(K.sum(self.Q), self.layer.output))

        def post_fn(r):
            return r.transpose((1, 0))

        # Get outputs for flat intermediate layers
        if K.ndim(self.layer.output) == 2:
            n_outs = K.int_shape(self.layer.output)[1]
            layer_grads = [inner_grad[:, i] for i in range(n_outs)]
            layer_outs = [self.layer.output[:, i] for i in range(n_outs)]
            self.attribution_units = layer_outs
            self.p_fn = lambda x: x

        # Get outputs for convolutional intermediate layers
        elif K.ndim(self.layer.output) == 4:
            if self.agg_fn is None:
                n_outs = int(np.prod(K.int_shape(self.layer.output)[1:]))
                layer_outs = K.batch_flatten(self.layer.output)
                layer_grads = [inner_grad]
                self.attribution_units = K.transpose(layer_outs)

                def post_fn(r):
                    return r[0]  # pylint: disable function-redefined

                self.p_fn = lambda x: x
            else:
                # If the aggregation function is given, treat each filter as a
                # unit of attribution
                if K.image_data_format() == 'channels_first':
                    n_outs = K.int_shape(self.layer.output)[1]

                    def sel_fn(g, i):
                        return self.agg_fn(g[:, i, :, :], axis=(1, 2))

                    p_fn = K.function(
                        [self.layer.output],
                        [self.agg_fn(self.layer.output, axis=(2, 3))])
                    self.p_fn = lambda x: p_fn([x])[0]
                else:
                    n_outs = K.int_shape(self.layer.output)[3]

                    def sel_fn(g, i):
                        return self.agg_fn(g[:, :, :, i], axis=(1, 2))

                    p_fn = K.function(
                        [self.layer.output],
                        [self.agg_fn(self.layer.output, axis=(1, 2))])
                    self.p_fn = lambda x: p_fn([x])[0]
                layer_grads = [sel_fn(inner_grad, i) for i in range(n_outs)]
                layer_outs = [
                    sel_fn(self.layer.output, i) for i in range(n_outs)
                ]
                self.attribution_units = layer_outs

        else:
            assert False, ('Unsupported tensor shape: ndim=%d' %
                           K.ndim(self.layer.output))

        if self.layer != self.model.layers[0]:
            feats_f = K.function([self.model.input], [self.layer.output])
            self.get_features = lambda x: np.array(feats_f([x]))[0]

        else:
            self.get_features = lambda x: x

        if (hasattr(self.model, 'uses_learning_phase')
                and self.model.uses_learning_phase
                and K.backend() == 'theano'):

            grad_f = K.function(
                [self.layer.output, K.learning_phase()], layer_grads)
            self.dF = lambda inp: post_fn(np.array(grad_f([inp, 0])))
        else:
            grad_f = K.function([self.layer.output], layer_grads)
            self.dF = lambda inp: post_fn(np.array(grad_f([inp])))

        self.is_compiled = True
        self.n_outs = n_outs

        return self
Esempio n. 38
0
 def kld(p, q):
     v = p * (K.log(p + K.epsilon()) - K.log(q + K.epsilon()))
     return K.sum(K.batch_flatten(v), axis=1, keepdims=True)
Esempio n. 39
0
    def step(self, a, states):
        r_tm1 = states[:self.nb_layers]
        c_tm1 = states[self.nb_layers:2*self.nb_layers]
        e_tm1 = states[2*self.nb_layers:3*self.nb_layers]

        if self.extrap_start_time is not None:
            t = states[-1]
            a = K.switch(t >= self.t_extrap, states[-2], a)  # if past self.extrap_start_time, the previous prediction will be treated as the actual

        c = []
        r = []
        e = []

        for l in reversed(range(self.nb_layers)):
            inputs = [r_tm1[l], e_tm1[l]]
            if l < self.nb_layers - 1:
                inputs.append(r_up)

            inputs = K.concatenate(inputs, axis=self.channel_axis)
            i = self.conv_layers['i'][l].call(inputs)
            f = self.conv_layers['f'][l].call(inputs)
            o = self.conv_layers['o'][l].call(inputs)
            _c = f * c_tm1[l] + i * self.conv_layers['c'][l].call(inputs)
            _r = o * self.LSTM_activation(_c)
            c.insert(0, _c)
            r.insert(0, _r)

            if l > 0:
                r_up = self.upsample.call(_r)

        for l in range(self.nb_layers):
            ahat = self.conv_layers['ahat'][l].call(r[l])
            if l == 0:
                ahat = K.minimum(ahat, self.pixel_max)
                frame_prediction = ahat

            # compute errors
            e_up = self.error_activation(ahat - a)
            e_down = self.error_activation(a - ahat)

            e.append(K.concatenate((e_up, e_down), axis=self.channel_axis))

            if self.output_layer_num == l:
                if self.output_layer_type == 'A':
                    output = a
                elif self.output_layer_type == 'Ahat':
                    output = ahat
                elif self.output_layer_type == 'R':
                    output = r[l]
                elif self.output_layer_type == 'E':
                    output = e[l]

            if l < self.nb_layers - 1:
                a = self.conv_layers['a'][l].call(e[l])
                a = self.pool.call(a)  # target for next layer

        if self.output_layer_type is None:
            if self.output_mode == 'prediction':
                output = frame_prediction
            else:
                for l in range(self.nb_layers):
                    layer_error = K.mean(K.batch_flatten(e[l]), axis=-1, keepdims=True)
                    all_error = layer_error if l == 0 else K.concatenate((all_error, layer_error), axis=-1)
                if self.output_mode == 'error':
                    output = all_error
                else:
                    output = K.concatenate((K.batch_flatten(frame_prediction), all_error), axis=-1)

        states = r + c + e
        if self.extrap_start_time is not None:
            states += [frame_prediction, t + 1]
        return output, states
Esempio n. 40
0
def gram_matrix(x):
    assert K.ndim(x) == 3
    features = K.batch_flatten(x)
    gram = K.dot(features, K.transpose(features))
    return gram
Esempio n. 41
0
    def compile(self):

        # Get outputs for flat intermediate layers.
        if K.ndim(self.layer.output) == 2:
            n_outs = K.int_shape(self.layer.output)[1]
            layer_outs = [self.layer.output[:, i] for i in range(n_outs)]

            def post_fn(r):
                return np.swapaxes(r, 0, 1)

            self.attribution_units = layer_outs

        # Get outputs for convolutional intermediate layers.
        elif K.ndim(self.layer.output) == 4:
            if self.agg_fn is None:
                n_outs = int(np.prod(K.int_shape(self.layer.output)[1:]))
                # K.batch_flatten seems really slow at times, so we'll save the
                # reshape for numpy.
                layer_outs = [self.layer.output]

                def post_fn(r):
                    return r[0].reshape((len(r[0]), -1))

                self.attribution_units = K.transpose(
                    K.batch_flatten(self.layer.output))
            else:
                # If the aggregation function is given, treat each filter as a
                # unit of attribution.
                if K.image_data_format() == 'channels_first':
                    n_outs = K.int_shape(self.layer.output)[1]

                    def sel_fn(g, i):
                        return self.agg_fn(g[:, i, :, :], axis=(1, 2))
                else:
                    n_outs = K.int_shape(self.layer.output)[3]

                    def sel_fn(g, i):
                        return self.agg_fn(g[:, :, :, i], axis=(1, 2))

                layer_outs = [
                    sel_fn(self.layer.output, i) for i in range(n_outs)
                ]

                def post_fn(r):
                    return np.swapaxes(r, 0, 1)

                self.attribution_units = layer_outs
        else:
            raise ValueError('Unsupported tensor shape: ndim={}'.format(
                K.ndim(self.layer.output)))

        if (hasattr(self.model, 'uses_learning_phase')
                and self.model.uses_learning_phase
                and K.backend() == 'theano'):

            grad_f = K.function(
                [self.model.input, K.learning_phase()], layer_outs)
            self.dF = lambda inp: post_fn(np.array(grad_f([inp, 0])))

        else:
            grad_f = K.function([self.model.input], layer_outs)
            self.dF = lambda inp: post_fn(np.array(grad_f([inp])))

        self.is_compiled = True
        self.n_outs = n_outs

        return self
Esempio n. 42
0
def gram_matrix(img):
    X = K.batch_flatten(K.permute_dimensions(img, (2, 0, 1)))

    G = K.dot(X, K.transpose(X)) / img.get_shape().num_elements()
    return G
Esempio n. 43
0
def bce(y_true, y_pred):
    y_true_f = K.clip(K.batch_flatten(y_true), K.epsilon(), 1.)
    y_pred_f = K.clip(K.batch_flatten(y_pred), K.epsilon(), 1.)
    bce = binary_crossentropy(y_true_f, y_pred_f)
    return bce
 def d_loss(y_true, y_pred):
     L = binary_crossentropy(K.batch_flatten(y_true),
                             K.batch_flatten(y_pred))
     return L
Esempio n. 45
0
def repeat(x):
    return K.reshape(K.repeat(K.batch_flatten(x), config["nb_timestep"]),
                     (config["b_s"], config["nb_timestep"], 512,
                      config["shape_r_gt"], config["shape_c_gt"]))
Esempio n. 46
0
def gram_matrix(x):
	features = backend.batch_flatten(backend.permute_dimensions(x, (2, 0, 1)))
	gram = backend.dot(features, backend.transpose(features))
	return gram
Esempio n. 47
0
	def custom_flatten(self,x):
		return K.batch_flatten(x)
Esempio n. 48
0
 def gramian(filters):
     c_filters = K.batch_flatten(
         K.permute_dimensions(K.squeeze(filters, axis=0),
                              pattern=(2, 0, 1)))
     return K.dot(c_filters, K.transpose(c_filters))
Esempio n. 49
0
def gram_matrix(x):
    features = backend.batch_flatten(backend.permute_dimensions(x, (2, 0, 1)))
    gram = backend.dot(features, backend.transpose(features))
    return gram
Esempio n. 50
0
 def _step(self, x_t, *args):
     x_t = K.reshape(x_t, self.reshape_dim)
     x_t = self.model(x_t)
     return K.batch_flatten(x_t)
Esempio n. 51
0
def predict_volume_stack(models,
                         data_generator,
                         batch_size,
                         grid_size,
                         verbose=False):
    """
    predict all the patches in a volume

    requires batch_size to be a divisor of the number of patches (prod(grid_size))

    Note: we allow models to be a list or a single model.
    Normally, if you'd like to run a function over a list for some param,
    you can simply loop outside of the function. here, however, we are dealing with a generator,
    and want the output of that generator to be consistent for each model.

    Returns:
    if models isa list of more than one model:
        a tuple of model entried, each entry is a tuple of:
        all_true, all_pred, all_vol, <all_prior>
    if models is just one model:
        a tuple of
        all_true, all_pred, all_vol, <all_prior>
    """

    if not isinstance(models, (list, tuple)):
        models = (models,)

    # compute the number of batches we need for one volume
    # we need the batch_size to be a divisor of nb_patches,
    # in order to loop through batches and form full volumes
    nb_patches = np.prod(grid_size)
    # assert np.mod(nb_patches, batch_size) == 0, \
        # "batch_size %d should be a divisor of nb_patches %d" %(batch_size, nb_patches)
    nb_batches = ((nb_patches - 1) // batch_size) + 1

    # go through the patches
    batch_gen = tqdm(range(nb_batches)) if verbose else range(nb_batches)
    for batch_idx in batch_gen:
        sample = next(data_generator)
        nb_vox = np.prod(sample[1].shape[1:-1])
        do_prior = isinstance(sample[0], (list, tuple))

        # pre-allocate all the data
        if batch_idx == 0:
            nb_labels = sample[1].shape[-1]
            all_vol = [np.zeros((nb_patches, nb_vox)) for f in models]
            all_true = [np.zeros((nb_patches, nb_vox * nb_labels)) for f in models]
            all_pred = [np.zeros((nb_patches, nb_vox * nb_labels)) for f in models]
            all_prior = [np.zeros((nb_patches, nb_vox * nb_labels)) for f in models]

        # get in_vol, y_true, y_pred
        for idx, model in enumerate(models):
            # with timer.Timer('prediction', verbose):
            pred = model.predict(sample[0])
            assert pred.shape[0] == batch_size, \
                "batch size mismatch. sample has batch size %d, given batch size is %d" %(pred.shape[0], batch_size)
            input_batch = sample[0] if not do_prior else sample[0][0]

            # compute batch range
            batch_start = batch_idx * batch_size
            batch_end = np.minimum(batch_start + batch_size, nb_patches)
            batch_range = np.arange(batch_start, batch_end)
            batch_vox_idx = batch_end-batch_start

            # update stacks
            all_vol[idx][batch_range, :] = K.batch_flatten(input_batch)[0:batch_vox_idx, :]
            all_true[idx][batch_range, :] = K.batch_flatten(sample[1])[0:batch_vox_idx, :]
            all_pred[idx][batch_range, :] = K._batch_flatten(pred)[0:batch_vox_idx, :]
            if do_prior:
                all_prior[idx][batch_range, :] = K.batch_flatten(sample[0][1])[0:batch_vox_idx, :]

    # reshape probabilistic answers
    for idx, _ in enumerate(models):
        all_true[idx] = np.reshape(all_true[idx], [nb_patches, nb_vox, nb_labels])
        all_pred[idx] = np.reshape(all_pred[idx], [nb_patches, nb_vox, nb_labels])
        if do_prior:
            all_prior[idx] = np.reshape(all_prior[idx], [nb_patches, nb_vox, nb_labels])

    # prepare output tuple
    ret = ()
    for midx, _ in enumerate(models):
        if do_prior:
            ret += ((all_true[midx], all_pred[midx], all_vol[midx], all_prior[midx]), )
        else:
            ret += ((all_true[midx], all_pred[midx], all_vol[midx]), )

    if len(models) == 1:
        ret = ret[0]
    return ret
Esempio n. 52
0
def cnn_bilstm_model(pooling_size=3,
                     nb_filters=32,
                     filters_length=10,
                     lstm_units=32,
                     attention_size=50):
    '''build model'''
    input = Input(shape=(None, ), dtype='int8')
    embedding_layer = Embedding(len(encoding_vectors),
                                len(encoding_vectors[0]),
                                weights=[encoding_vectors],
                                input_length=None,
                                trainable=False)
    embedding_output = embedding_layer(input)
    with tf.name_scope('first_cnn'):
        # first cnn layer
        cnn_output = Dropout(0.2)(
            MaxPooling1D(pool_length=pooling_size, stride=pooling_size)(
                Convolution1D(nb_filters,
                              filters_length,
                              border_mode='same',
                              activation='relu',
                              input_shape=(None, 24))(embedding_output))
            # output shape is in (batch_size, steps, filters), normalizing over the feature axis which is -1
        )
    with tf.name_scope('Second_cnn'):
        # stack another cnn layer on top
        cnn_output = Dropout(0.2)(MaxPooling1D(
            pool_length=pooling_size,
            stride=pooling_size)(Convolution1D(nb_filters,
                                               filters_length,
                                               border_mode='same',
                                               activation='relu')(cnn_output)))

    with tf.name_scope('Third_cnn'):
        # stack another cnn layer on top
        cnn_output = Dropout(0.2)(MaxPooling1D(
            pool_length=pooling_size,
            stride=pooling_size)(Convolution1D(nb_filters,
                                               filters_length,
                                               border_mode='same',
                                               activation='relu')(cnn_output)))

    with tf.name_scope('Fourth_cnn'):
        # stack another cnn layer on top
        cnn_output = Dropout(0.2)(MaxPooling1D(
            pool_length=pooling_size,
            stride=pooling_size)(Convolution1D(nb_filters,
                                               filters_length,
                                               border_mode='same',
                                               activation='relu')(cnn_output)))

    with tf.name_scope('bilstm_layer'):
        lstm_output = Bidirectional(
            LSTM(lstm_units,
                 dropout=0.1,
                 return_sequences=True,
                 input_shape=(None, nb_filters)))(cnn_output)
        # output shape: (batch_size, time steps, hidden size=2*nb_filters)

    hidden_size = lstm_output.get_shape()[2].value
    print('hidden size:', hidden_size)

    with tf.name_scope('attention_module'):
        # [batch_size, time_steps, attention_size]
        context_weights = Dense(attention_size,
                                activation='tanh',
                                kernel_initializer=random_normal(),
                                bias_initializer=random_normal())(lstm_output)
        # [batch_size, time_steps]
        scores = Lambda(lambda x: K.batch_flatten(x))(
            Dense(1, kernel_initializer=random_normal(),
                  use_bias=False)(context_weights))

        # softmax probability distribution, [batch_size, sequence_length]
        attention_weights = Lambda(lambda x: K.expand_dims(x, axis=-1))(
            Activation("softmax")(scores))

        # Multiply() behaves exactly as tf.multiply() which supports shape broadcasting, so its output_shape is [batch_size, time_steps, hidden_size]
        # Lambda(lambda x: K.sum(x, axis=1, keepdims=False)) is equivalent to tf.reduce_sum(axis=1)
        # [batch_size, hidden]
        output = Lambda(lambda x: K.sum(x, axis=1, keepdims=False))(
            Multiply()([lstm_output, attention_weights]))

    preds = Dense(nb_classes, activation='softmax')(output)
    model = Model(inputs=[input], outputs=preds)
    from keras import optimizers
    optim = optimizers.adam(lr=0.0001)
    # optim = optimizers.sgd(lr=0.001)
    model.compile(loss='kld', optimizer=optim, metrics=['acc'])
    return model
def grammian_matrix(matrix):
    flattened_matrix = K.batch_flatten(K.permute_dimensions(matrix, (2, 0, 1)))
    matrix_transpose_dot = K.dot(flattened_matrix, K.transpose(flattened_matrix))
    element_count = matrix.get_shape().num_elements()
    return matrix_transpose_dot / element_count
Esempio n. 54
0
def gram_matrix(x):
    assert K.ndim(x) == 3
    features = K.batch_flatten(x)
    gram = K.dot(features - 1, K.transpose(features - 1))
    return gram
Esempio n. 55
0
 def d_loss(y_true, y_pred):
     return objectives.binary_crossentropy(K.batch_flatten(y_true),
                                           K.batch_flatten(y_pred))
Esempio n. 56
0
    def compile(self):
        inner_grad = self.post_grad(
            K.gradients(K.sum(self.Q), self.layer.output))

        # Get outputs for flat intermediate layers
        if K.ndim(self.layer.output) == 2:
            n_outs = K.int_shape(self.layer.output)[1]
            layer_grads = inner_grad
            layer_outs = self.layer.output
            self.attribution_units = [
                self.layer.output[:, i] for i in range(n_outs)
            ]

        # Get outputs for convolutional intermediate layers
        # We treat each filter as an output, as in the original paper
        elif K.ndim(self.layer.output) == 4:
            if self.agg_fn is None:
                n_outs = int(np.prod(K.int_shape(self.layer.output)[1:]))
                layer_grads = K.batch_flatten(inner_grad)
                layer_outs = K.batch_flatten(self.layer.output)
                self.attribution_units = [
                    layer_outs[:, i] for i in range(n_outs)
                ]
            else:
                if K.image_data_format() == 'channels_first':
                    n_outs = K.int_shape(self.layer.output)[1]

                    def sel_fn(g):
                        return self.agg_fn(g, axis=(2, 3))
                else:
                    n_outs = K.int_shape(self.layer.output)[3]

                    def sel_fn(g):
                        return self.agg_fn(g, axis=(1, 2))

                layer_grads = sel_fn(inner_grad)  # (batch, feats)
                layer_outs = sel_fn(self.layer.output)  # (batch, feats)
                self.attribution_units = [
                    layer_outs[:, i] for i in range(n_outs)
                ]

        else:
            assert False, "Unsupported tensor shape: ndim=%d" % K.ndim(
                self.layer.output)

        if K.backend() == "theano":
            jac = K.theano.gradient.jacobian(K.sum(layer_outs, axis=0),
                                             self.model.input)
            outer_grads = [
                layer_grads * K.transpose(K.sum(jac, axis=(2, 3, 4)))
            ]

            def post_fn(r):
                return r[0]
        elif K.backend() == "tensorflow":
            outer_grads = [
                layer_grads[:, i] * K.sum(self.post_grad(
                    K.gradients(K.sum(layer_outs[:, i]), self.model.input)),
                                          axis=(1, 2, 3))
                for i in range(n_outs)
            ]

            # np.swapaxes(np.array(r),0,1)
            def post_fn(r):
                return np.array(np.transpose(r))

        if hasattr(
                self.model, 'uses_learning_phase'
        ) and self.model.uses_learning_phase and K.backend() == 'theano':
            grad_f = K.function(
                [self.model.input, K.learning_phase()], outer_grads)
            self.dF = lambda inp: post_fn(grad_f([inp, 0]))
        else:
            grad_f = K.function([self.model.input], outer_grads)
            self.dF = lambda inp: post_fn(grad_f([inp]))

        self.is_compiled = True
        self.n_outs = n_outs

        return self
Esempio n. 57
0
    def call(self, x, mask=None):
        # TODO: validate input shape

        assert (len(x) == 3)
        L_flat = x[0]
        mu = x[1]
        a = x[2]

        if self.mode == 'full':
            # Create L and L^T matrix, which we use to construct the positive-definite matrix P.
            L = None
            LT = None
            if K.backend() == 'theano':
                import theano.tensor as T
                import theano

                def fn(x, L_acc, LT_acc):
                    x_ = K.zeros((self.nb_actions, self.nb_actions))
                    x_ = T.set_subtensor(x_[np.tril_indices(self.nb_actions)], x)
                    diag = K.exp(T.diag(x_)) + K.epsilon()
                    x_ = T.set_subtensor(x_[np.diag_indices(self.nb_actions)], diag)
                    return x_, x_.T

                outputs_info = [
                    K.zeros((self.nb_actions, self.nb_actions)),
                    K.zeros((self.nb_actions, self.nb_actions)),
                ]
                results, _ = theano.scan(fn=fn, sequences=L_flat, outputs_info=outputs_info)
                L, LT = results
            elif K.backend() == 'tensorflow':
                import tensorflow as tf

                # Number of elements in a triangular matrix.
                nb_elems = (self.nb_actions * self.nb_actions + self.nb_actions) // 2

                # Create mask for the diagonal elements in L_flat. This is used to exponentiate
                # only the diagonal elements, which is done before gathering.
                diag_indeces = [0]
                for row in range(1, self.nb_actions):
                    diag_indeces.append(diag_indeces[-1] + (row + 1))
                diag_mask = np.zeros(1 + nb_elems)  # +1 for the leading zero
                diag_mask[np.array(diag_indeces) + 1] = 1
                diag_mask = K.variable(diag_mask)

                # Add leading zero element to each element in the L_flat. We use this zero
                # element when gathering L_flat into a lower triangular matrix L.
                nb_rows = tf.shape(L_flat)[0]
                zeros = tf.expand_dims(tf.tile(K.zeros((1,)), [nb_rows]), 1)
                try:
                    # Old TF behavior.
                    L_flat = tf.concat(1, [zeros, L_flat])
                except (TypeError, ValueError):
                    # New TF behavior
                    L_flat = tf.concat([zeros, L_flat], 1)

                # Create mask that can be used to gather elements from L_flat and put them
                # into a lower triangular matrix.
                tril_mask = np.zeros((self.nb_actions, self.nb_actions), dtype='int32')
                tril_mask[np.tril_indices(self.nb_actions)] = range(1, nb_elems + 1)

                # Finally, process each element of the batch.
                init = [
                    K.zeros((self.nb_actions, self.nb_actions)),
                    K.zeros((self.nb_actions, self.nb_actions)),
                ]

                def fn(a, x):
                    # Exponentiate everything. This is much easier than only exponentiating
                    # the diagonal elements, and, usually, the action space is relatively low.
                    x_ = K.exp(x) + K.epsilon()
                    # Only keep the diagonal elements.
                    x_ *= diag_mask
                    # Add the original, non-diagonal elements.
                    x_ += x * (1. - diag_mask)
                    # Finally, gather everything into a lower triangular matrix.
                    L_ = tf.gather(x_, tril_mask)
                    return [L_, tf.transpose(L_)]

                tmp = tf.scan(fn, L_flat, initializer=init)
                if isinstance(tmp, (list, tuple)):
                    # TensorFlow 0.10 now returns a tuple of tensors.
                    L, LT = tmp
                else:
                    # Old TensorFlow < 0.10 returns a shared tensor.
                    L = tmp[:, 0, :, :]
                    LT = tmp[:, 1, :, :]
            else:
                raise RuntimeError('Unknown Keras backend "{}".'.format(K.backend()))
            assert L is not None
            assert LT is not None
            P = K.batch_dot(L, LT)
        elif self.mode == 'diag':
            if K.backend() == 'theano':
                import theano.tensor as T
                import theano

                def fn(x, P_acc):
                    x_ = K.zeros((self.nb_actions, self.nb_actions))
                    x_ = T.set_subtensor(x_[np.diag_indices(self.nb_actions)], x)
                    return x_

                outputs_info = [
                    K.zeros((self.nb_actions, self.nb_actions)),
                ]
                P, _ = theano.scan(fn=fn, sequences=L_flat, outputs_info=outputs_info)
            elif K.backend() == 'tensorflow':
                import tensorflow as tf

                # Create mask that can be used to gather elements from L_flat and put them
                # into a diagonal matrix.
                diag_mask = np.zeros((self.nb_actions, self.nb_actions), dtype='int32')
                diag_mask[np.diag_indices(self.nb_actions)] = range(1, self.nb_actions + 1)

                # Add leading zero element to each element in the L_flat. We use this zero
                # element when gathering L_flat into a lower triangular matrix L.
                nb_rows = tf.shape(L_flat)[0]
                zeros = tf.expand_dims(tf.tile(K.zeros((1,)), [nb_rows]), 1)
                try:
                    # Old TF behavior.
                    L_flat = tf.concat(1, [zeros, L_flat])
                except (TypeError, ValueError):
                    # New TF behavior
                    L_flat = tf.concat([zeros, L_flat], 1)

                # Finally, process each element of the batch.
                def fn(a, x):
                    x_ = tf.gather(x, diag_mask)
                    return x_

                P = tf.scan(fn, L_flat, initializer=K.zeros((self.nb_actions, self.nb_actions)))
            else:
                raise RuntimeError('Unknown Keras backend "{}".'.format(K.backend()))
        assert P is not None
        assert K.ndim(P) == 3

        # Combine a, mu and P into a scalar (over the batches). What we compute here is
        # -.5 * (a - mu)^T * P * (a - mu), where * denotes the dot-product. Unfortunately
        # TensorFlow handles vector * P slightly suboptimal, hence we convert the vectors to
        # 1xd/dx1 matrices and finally flatten the resulting 1x1 matrix into a scalar. All
        # operations happen over the batch size, which is dimension 0.
        prod = K.batch_dot(K.expand_dims(a - mu, 1), P)
        prod = K.batch_dot(prod, K.expand_dims(a - mu, -1))
        A = -.5 * K.batch_flatten(prod)
        assert K.ndim(A) == 2
        return A