def test_softmax(self):
    x = backend.placeholder(ndim=2)
    f = backend.function([x], [activations.softmax(x)])
    test_values = np.random.random((2, 5))

    result = f([test_values])[0]
    expected = _ref_softmax(test_values[0])
    self.assertAllClose(result[0], expected, rtol=1e-05)

    with self.assertRaises(ValueError):
      x = backend.placeholder(ndim=1)
      activations.softmax(x)
 def test_temporal_softmax(self):
   x = backend.placeholder(shape=(2, 2, 3))
   f = backend.function([x], [activations.softmax(x)])
   test_values = np.random.random((2, 2, 3)) * 10
   result = f([test_values])[0]
   expected = _ref_softmax(test_values[0, 0])
   self.assertAllClose(result[0, 0], expected, rtol=1e-05)
Beispiel #3
0
 def test_softmax(self, shape):
     x = backend.placeholder(ndim=len(shape))
     f = backend.function([x], [activations.softmax(x, axis=-1)])
     test_values = np.random.random(shape)
     result = f([test_values])[0]
     expected = _ref_softmax(test_values)
     self.assertAllClose(result, expected, rtol=1e-05)
Beispiel #4
0
    def call(self, inputs,**kwargs):

        if K.ndim(inputs[0]) != 3:
            raise ValueError("Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs)))

        embeds_vec_list = inputs
        row = []
        col = []
        num_inputs = len(embeds_vec_list)
        for i in range(num_inputs - 1):
            for j in range(i + 1, num_inputs):
                row.append(i)
                col.append(j)
        p = concatenate([embeds_vec_list[idx] for idx in row],axis=1)# batch num_pairs k
        q = concatenate([embeds_vec_list[idx] for idx in col],axis=1)  # Reshape([num_pairs, self.embedding_size])
        inner_product = p * q

        bi_interaction = inner_product

        attention_temp = Dense(self.attention_factor,'relu',kernel_regularizer=l2(self.l2_reg_w))(bi_interaction)
        attention_weight = softmax(K.dot(attention_temp, self.projection_h),axis=1)

        attention_output = K.sum(attention_weight*bi_interaction,axis=1)
        attention_output = tf.nn.dropout(attention_output,self.keep_prob,seed=1024)
            # Dropout(1-self.keep_prob)(attention_output)
        afm_out = K.dot(attention_output, self.projection_p)

        return afm_out
Beispiel #5
0
def decode(X, shape, arch='ff'):
    if arch == 'ff':
        X = tf.reduce_mean(X, axis=1)
        X = Dense(128, activation='relu')(X)
        X = Dense(128, activation='relu')(X)
        X = Dense(128, activation='relu')(X)
        X = Dense(np.prod(shape))(X)
        X = tf.reshape(X, [-1] + shape)
    elif arch == 'sab':
        K = shape[0]
        X = IMAB(X, K, 128, 4, var_name='seed')
        X = SAB(X, 128, 4)
        X = Dense(np.prod(shape[1:]))(X)
        X = tf.reshape(X, [-1] + shape)
    elif arch == 'sabsab':
        K = shape[0]
        X = IMAB(X, K, 128, 4, var_name='seed')
        X = SAB(X, 128, 4)
        X = SAB(X, 128, 4)
        X = Dense(np.prod(shape[1:]))(X)
        X = tf.reshape(X, [-1] + shape)
    elif arch == 'dotprod':
        C = Dense(128, activation='tanh')(X)
        S = softmax(C, axis=1)

        X = tf.reduce_sum(X * S, axis=1)
        X = Dense(128, activation='relu')(X)
        X = Dense(128, activation='relu')(X)
        X = Dense(128, activation='relu')(X)
        X = Dense(np.prod(shape))(X)
        X = tf.reshape(X, [-1] + shape)
    else:
        raise ValueError('Invalid decoder architecture')
    return X
Beispiel #6
0
 def test_softmax_3d_axis_tuple(self):
     x = backend.placeholder(ndim=3)
     f = backend.function([x], [activations.softmax(x, axis=(1, 2))])
     test_values = np.random.random((2, 3, 5))
     result = f([test_values])[0]
     expected = np.zeros((2, 3, 5))
     for i in range(2):
         expected[i, :, :] = _ref_softmax(test_values[i, :, :])
     self.assertAllClose(result, expected, rtol=1e-05)
Beispiel #7
0
 def test_softmax_2d_axis_0(self):
     x = backend.placeholder(ndim=2)
     f = backend.function([x], [activations.softmax(x, axis=0)])
     test_values = np.random.random((2, 5))
     result = f([test_values])[0]
     expected = np.zeros((2, 5))
     for i in range(5):
         expected[:, i] = _ref_softmax(test_values[:, i])
     self.assertAllClose(result, expected, rtol=1e-05)
Beispiel #8
0
    def __init__(self,
                 filters,
                 kernel_size,
                 data_format='channels_first',
                 padding='same',
                 strides=1,
                 alpha=0.1,
                 is_final=False,
                 use_edges=False):
        """
        A layer that performs the following sequence of operations:
            1. 2D Convolution with no activation function
            2. Batchnorm
            3. Leaky ReLu activation function

        If is_final is set to True, then layer will perform the following sequence of operations:
            1. Batchnorm
            2. 2D Convolution with no activation function
            3. Sigmoid activation function if use_edges is False, otherwise softmax channel-wise

        :param filters: The number of filters for the 2D convolutional layer
        :param kernel_size: The kernel size for the 2D convolutional layer
        :param data_format: The data format of the input to the 2D convolutional layer
        :param padding: The padding to use for the convolutional layer
        :param strides: The strides to use for the convolutional layer
        :param alpha: The parameter of the leaky ReLu activation
        :param is_final: Boolean flag to signal if this block is an intermediary or final block
        :param use_edges: Boolean flag to signal the type of activation function to use if this block is a
               final block
        """
        channel_axis = 1 if data_format == 'channels_first' else -1
        self.is_final = is_final

        self.conv = Conv2D(filters,
                           kernel_size,
                           strides,
                           padding,
                           data_format,
                           activation='linear',
                           kernel_initializer='glorot_uniform',
                           bias_initializer='glorot_uniform')

        self.bn = BatchNormalization(axis=channel_axis)

        if not is_final:
            self.activation = LeakyReLU(alpha=alpha)
        else:
            if use_edges:
                # if edges are used and it is a final layer, then number of channels must be 3
                # and we want to normalize channel-wise
                self.activation = lambda x: softmax(x, axis=channel_axis)
            else:
                self.activation = sigmoid
Beispiel #9
0
def iterative_train(theories: List[Theory],
                    X: np.ndarray, Y: np.ndarray,
                    loss_func: Callable[..., tf.Tensor] = generalized_mean_loss,
                    optimizer_pred: OptimizerV2 = Adam(),
                    optimizer_domain: OptimizerV2 = Adam(),
                    K: int = 10000,
                    eps: float = 10.,
                    loss_kwargs: Optional[Dict] = None):

    if loss_kwargs is None:
        loss_kwargs = {"gamma": -1, "eps": eps}

    trainable_pred_variables = sum(map(lambda x: x.trainable_pred_variables(), theories), [])

    trainable_domain_variables = sum(map(lambda x: x.trainable_domain_variables(), theories), [])
    # flag = False

    for k in range(K):  # Main training loop
        """ Can be optimized by removing the double evaluation """

        # Predictor optimization
        with tf.GradientTape() as tape:

            loss = loss_func(theories, X, Y, **loss_kwargs)
            if not k % 100:
                print("Step %d loss %.5f" % (k, loss.numpy()))

        gradients = tape.gradient(loss, trainable_pred_variables)
        optimizer_pred.apply_gradients(zip(gradients, trainable_pred_variables))

        best_idx = assign_theories(theories, X, Y, ) # (batch, ) labels for the domain classification

        with tf.GradientTape() as tape:

            domain_probs = []
            for theory in theories:
                preds = theory.domain(X)  # (batch, 1)
                domain_probs.append(preds)

            domain_probs = tf.concat(domain_probs, axis=1)  # (batch, theories)
            domain_probs = softmax(domain_probs, axis=1)  # (batch, theories)

            cce = SparseCategoricalCrossentropy()
            loss = cce(y_true=best_idx, y_pred=domain_probs)

        gradients = tape.gradient(loss, trainable_domain_variables)
        optimizer_domain.apply_gradients(zip(gradients, trainable_domain_variables))
Beispiel #10
0
def Fashion_CNN(input_shape, num_classes, learning_rate, graph):

    with graph.as_default():

        #is_train = tf.placeholder(tf.bool)
        img = tf.placeholder(tf.float32, input_shape)

        labels = tf.placeholder(tf.float32, shape=(None, num_classes))
        lr = tf.placeholder(tf.float32)

        # first 3 convolutions approximate Conv(7,7):
        layer = conv_layer(img, 64)
        layer = conv_layer(layer, 64)
        layer = conv_layer(layer, 64)
        layer = MaxPooling2D()(layer)
        layer = dropout(layer, keep_prob=0.7)
        layer = conv_layer(layer, 128, shape=(-1, 14, 14, -1))
        layer = conv_layer(layer, 128, shape=(-1, 14, 14, -1))
        layer = conv_layer(layer, 64, (1, 1), shape=(-1, 14, 14, -1))
        layer = MaxPooling2D()(layer)
        layer = Flatten()(layer)
        layer = dropout(layer, keep_prob=0.7)
        layer = fc_layer(layer, 2048)
        layer = dropout(layer)
        layer = fc_layer(layer, 512)
        layer = dropout(layer)
        layer = fc_layer(layer, 256)
        layer = dropout(layer)
        layer = Dense(10, kernel_initializer='glorot_normal')(layer)
        layer = batch_norm(layer,
                           updates_collections=None,
                           center=True,
                           scale=True)
        preds = activations.softmax(layer)

        lossL2 = tf.add_n([
            tf.nn.l2_loss(v) for v in tf.trainable_variables()
            if 'kernel' in v.name
        ])

        beta = 1e-7
        loss = tf.reduce_mean(losses.categorical_crossentropy(labels, preds))
        train_step = NadamOptimizer(learning_rate=lr).minimize(loss)

        acc_value = tf.reduce_mean(metrics.categorical_accuracy(labels, preds))

        return img, labels, lr, train_step, loss, acc_value
 def call(self, inputs):
   return activations.softmax(inputs, axis=self.axis)
Beispiel #12
0
 def call(self, inputs):
   return activations.softmax(inputs, axis=self.axis)
def axis_softmax(x, axis=1):
    return activations.softmax(x, axis=axis)
Beispiel #14
0
layer = MaxPooling2D()(layer)
layer = Flatten()(layer)
layer = dropout(layer, keep_prob=0.7, is_training=is_train)
layer = fc_layer(layer, 2048)
layer = dropout(layer, is_training=is_train)
layer = fc_layer(layer, 512)
layer = dropout(layer, is_training=is_train)
layer = fc_layer(layer, 256)
layer = dropout(layer, is_training=is_train)
layer = Dense(10, kernel_initializer='glorot_normal')(layer)
layer = batch_norm(layer,
                   updates_collections=None,
                   center=True,
                   scale=True,
                   is_training=is_train)
preds = activations.softmax(layer)

lossL2 = tf.add_n(
    [tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'kernel' in v.name])

beta = 1e-7
loss = tf.reduce_mean(losses.categorical_crossentropy(labels, preds))
train_step = NadamOptimizer(learning_rate=lr).minimize(loss)

# Initialize all variables
init_op = tf.global_variables_initializer()
sess.run(init_op)

acc_value = tf.reduce_mean(metrics.categorical_accuracy(labels, preds))

Beispiel #15
0
def cumsoftmax(x, axis=-1):
    return tf.math.cumsum(activations.softmax(x, axis=axis), axis=axis)