def test_softmax(self): x = backend.placeholder(ndim=2) f = backend.function([x], [activations.softmax(x)]) test_values = np.random.random((2, 5)) result = f([test_values])[0] expected = _ref_softmax(test_values[0]) self.assertAllClose(result[0], expected, rtol=1e-05) with self.assertRaises(ValueError): x = backend.placeholder(ndim=1) activations.softmax(x)
def test_temporal_softmax(self): x = backend.placeholder(shape=(2, 2, 3)) f = backend.function([x], [activations.softmax(x)]) test_values = np.random.random((2, 2, 3)) * 10 result = f([test_values])[0] expected = _ref_softmax(test_values[0, 0]) self.assertAllClose(result[0, 0], expected, rtol=1e-05)
def test_softmax(self, shape): x = backend.placeholder(ndim=len(shape)) f = backend.function([x], [activations.softmax(x, axis=-1)]) test_values = np.random.random(shape) result = f([test_values])[0] expected = _ref_softmax(test_values) self.assertAllClose(result, expected, rtol=1e-05)
def call(self, inputs,**kwargs): if K.ndim(inputs[0]) != 3: raise ValueError("Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) embeds_vec_list = inputs row = [] col = [] num_inputs = len(embeds_vec_list) for i in range(num_inputs - 1): for j in range(i + 1, num_inputs): row.append(i) col.append(j) p = concatenate([embeds_vec_list[idx] for idx in row],axis=1)# batch num_pairs k q = concatenate([embeds_vec_list[idx] for idx in col],axis=1) # Reshape([num_pairs, self.embedding_size]) inner_product = p * q bi_interaction = inner_product attention_temp = Dense(self.attention_factor,'relu',kernel_regularizer=l2(self.l2_reg_w))(bi_interaction) attention_weight = softmax(K.dot(attention_temp, self.projection_h),axis=1) attention_output = K.sum(attention_weight*bi_interaction,axis=1) attention_output = tf.nn.dropout(attention_output,self.keep_prob,seed=1024) # Dropout(1-self.keep_prob)(attention_output) afm_out = K.dot(attention_output, self.projection_p) return afm_out
def decode(X, shape, arch='ff'): if arch == 'ff': X = tf.reduce_mean(X, axis=1) X = Dense(128, activation='relu')(X) X = Dense(128, activation='relu')(X) X = Dense(128, activation='relu')(X) X = Dense(np.prod(shape))(X) X = tf.reshape(X, [-1] + shape) elif arch == 'sab': K = shape[0] X = IMAB(X, K, 128, 4, var_name='seed') X = SAB(X, 128, 4) X = Dense(np.prod(shape[1:]))(X) X = tf.reshape(X, [-1] + shape) elif arch == 'sabsab': K = shape[0] X = IMAB(X, K, 128, 4, var_name='seed') X = SAB(X, 128, 4) X = SAB(X, 128, 4) X = Dense(np.prod(shape[1:]))(X) X = tf.reshape(X, [-1] + shape) elif arch == 'dotprod': C = Dense(128, activation='tanh')(X) S = softmax(C, axis=1) X = tf.reduce_sum(X * S, axis=1) X = Dense(128, activation='relu')(X) X = Dense(128, activation='relu')(X) X = Dense(128, activation='relu')(X) X = Dense(np.prod(shape))(X) X = tf.reshape(X, [-1] + shape) else: raise ValueError('Invalid decoder architecture') return X
def test_softmax_3d_axis_tuple(self): x = backend.placeholder(ndim=3) f = backend.function([x], [activations.softmax(x, axis=(1, 2))]) test_values = np.random.random((2, 3, 5)) result = f([test_values])[0] expected = np.zeros((2, 3, 5)) for i in range(2): expected[i, :, :] = _ref_softmax(test_values[i, :, :]) self.assertAllClose(result, expected, rtol=1e-05)
def test_softmax_2d_axis_0(self): x = backend.placeholder(ndim=2) f = backend.function([x], [activations.softmax(x, axis=0)]) test_values = np.random.random((2, 5)) result = f([test_values])[0] expected = np.zeros((2, 5)) for i in range(5): expected[:, i] = _ref_softmax(test_values[:, i]) self.assertAllClose(result, expected, rtol=1e-05)
def __init__(self, filters, kernel_size, data_format='channels_first', padding='same', strides=1, alpha=0.1, is_final=False, use_edges=False): """ A layer that performs the following sequence of operations: 1. 2D Convolution with no activation function 2. Batchnorm 3. Leaky ReLu activation function If is_final is set to True, then layer will perform the following sequence of operations: 1. Batchnorm 2. 2D Convolution with no activation function 3. Sigmoid activation function if use_edges is False, otherwise softmax channel-wise :param filters: The number of filters for the 2D convolutional layer :param kernel_size: The kernel size for the 2D convolutional layer :param data_format: The data format of the input to the 2D convolutional layer :param padding: The padding to use for the convolutional layer :param strides: The strides to use for the convolutional layer :param alpha: The parameter of the leaky ReLu activation :param is_final: Boolean flag to signal if this block is an intermediary or final block :param use_edges: Boolean flag to signal the type of activation function to use if this block is a final block """ channel_axis = 1 if data_format == 'channels_first' else -1 self.is_final = is_final self.conv = Conv2D(filters, kernel_size, strides, padding, data_format, activation='linear', kernel_initializer='glorot_uniform', bias_initializer='glorot_uniform') self.bn = BatchNormalization(axis=channel_axis) if not is_final: self.activation = LeakyReLU(alpha=alpha) else: if use_edges: # if edges are used and it is a final layer, then number of channels must be 3 # and we want to normalize channel-wise self.activation = lambda x: softmax(x, axis=channel_axis) else: self.activation = sigmoid
def iterative_train(theories: List[Theory], X: np.ndarray, Y: np.ndarray, loss_func: Callable[..., tf.Tensor] = generalized_mean_loss, optimizer_pred: OptimizerV2 = Adam(), optimizer_domain: OptimizerV2 = Adam(), K: int = 10000, eps: float = 10., loss_kwargs: Optional[Dict] = None): if loss_kwargs is None: loss_kwargs = {"gamma": -1, "eps": eps} trainable_pred_variables = sum(map(lambda x: x.trainable_pred_variables(), theories), []) trainable_domain_variables = sum(map(lambda x: x.trainable_domain_variables(), theories), []) # flag = False for k in range(K): # Main training loop """ Can be optimized by removing the double evaluation """ # Predictor optimization with tf.GradientTape() as tape: loss = loss_func(theories, X, Y, **loss_kwargs) if not k % 100: print("Step %d loss %.5f" % (k, loss.numpy())) gradients = tape.gradient(loss, trainable_pred_variables) optimizer_pred.apply_gradients(zip(gradients, trainable_pred_variables)) best_idx = assign_theories(theories, X, Y, ) # (batch, ) labels for the domain classification with tf.GradientTape() as tape: domain_probs = [] for theory in theories: preds = theory.domain(X) # (batch, 1) domain_probs.append(preds) domain_probs = tf.concat(domain_probs, axis=1) # (batch, theories) domain_probs = softmax(domain_probs, axis=1) # (batch, theories) cce = SparseCategoricalCrossentropy() loss = cce(y_true=best_idx, y_pred=domain_probs) gradients = tape.gradient(loss, trainable_domain_variables) optimizer_domain.apply_gradients(zip(gradients, trainable_domain_variables))
def Fashion_CNN(input_shape, num_classes, learning_rate, graph): with graph.as_default(): #is_train = tf.placeholder(tf.bool) img = tf.placeholder(tf.float32, input_shape) labels = tf.placeholder(tf.float32, shape=(None, num_classes)) lr = tf.placeholder(tf.float32) # first 3 convolutions approximate Conv(7,7): layer = conv_layer(img, 64) layer = conv_layer(layer, 64) layer = conv_layer(layer, 64) layer = MaxPooling2D()(layer) layer = dropout(layer, keep_prob=0.7) layer = conv_layer(layer, 128, shape=(-1, 14, 14, -1)) layer = conv_layer(layer, 128, shape=(-1, 14, 14, -1)) layer = conv_layer(layer, 64, (1, 1), shape=(-1, 14, 14, -1)) layer = MaxPooling2D()(layer) layer = Flatten()(layer) layer = dropout(layer, keep_prob=0.7) layer = fc_layer(layer, 2048) layer = dropout(layer) layer = fc_layer(layer, 512) layer = dropout(layer) layer = fc_layer(layer, 256) layer = dropout(layer) layer = Dense(10, kernel_initializer='glorot_normal')(layer) layer = batch_norm(layer, updates_collections=None, center=True, scale=True) preds = activations.softmax(layer) lossL2 = tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'kernel' in v.name ]) beta = 1e-7 loss = tf.reduce_mean(losses.categorical_crossentropy(labels, preds)) train_step = NadamOptimizer(learning_rate=lr).minimize(loss) acc_value = tf.reduce_mean(metrics.categorical_accuracy(labels, preds)) return img, labels, lr, train_step, loss, acc_value
def call(self, inputs): return activations.softmax(inputs, axis=self.axis)
def call(self, inputs): return activations.softmax(inputs, axis=self.axis)
def axis_softmax(x, axis=1): return activations.softmax(x, axis=axis)
layer = MaxPooling2D()(layer) layer = Flatten()(layer) layer = dropout(layer, keep_prob=0.7, is_training=is_train) layer = fc_layer(layer, 2048) layer = dropout(layer, is_training=is_train) layer = fc_layer(layer, 512) layer = dropout(layer, is_training=is_train) layer = fc_layer(layer, 256) layer = dropout(layer, is_training=is_train) layer = Dense(10, kernel_initializer='glorot_normal')(layer) layer = batch_norm(layer, updates_collections=None, center=True, scale=True, is_training=is_train) preds = activations.softmax(layer) lossL2 = tf.add_n( [tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'kernel' in v.name]) beta = 1e-7 loss = tf.reduce_mean(losses.categorical_crossentropy(labels, preds)) train_step = NadamOptimizer(learning_rate=lr).minimize(loss) # Initialize all variables init_op = tf.global_variables_initializer() sess.run(init_op) acc_value = tf.reduce_mean(metrics.categorical_accuracy(labels, preds))
def cumsoftmax(x, axis=-1): return tf.math.cumsum(activations.softmax(x, axis=axis), axis=axis)