def __init__(self, n_heads, tm_state_units, is_cam, num_shift, M, name='Read'): self.n_heads = n_heads self.is_cam = is_cam # build a controller for all the heads given size and activation for each parameter params = [('s', (num_shift, lambda z: softmax(softplus(z)))), ('gamma', (1, lambda z: 1 + softplus(z)))] if self.is_cam: params = [('k', (M, tanh)), ('beta', (1, softplus)), ('g', (1, sigmoid))] + params self.head_ctrl = self._model2d((tm_state_units, ), params, name) self.trainable_weights = self.head_ctrl.trainable_weights
def test_softplus(self): def softplus(x): return np.log(np.ones_like(x) + np.exp(x)) x = backend.placeholder(ndim=2) f = backend.function([x], [activations.softplus(x)]) test_values = np.random.random((2, 5)) result = f([test_values])[0] expected = softplus(test_values) self.assertAllClose(result, expected, rtol=1e-05)
def test_softplus(): """Test using a reference softplus implementation. """ def softplus(x): return np.log(np.ones_like(x) + np.exp(x)) x = K.placeholder(ndim=2) f = K.function([x], [activations.softplus(x)]) test_values = get_standard_values() result = f([test_values])[0] expected = softplus(test_values) assert_allclose(result, expected, rtol=1e-05)
def decode_nflow_func(z_input, n_blk, dim_x, mdl, gen_nodes=None): permute_ind = []; #n_blk = 5; for ii in range(n_blk): np.random.seed(ii); permute_ind.append(tf.convert_to_tensor(np.random.permutation(dim_x))); ## used zero padding before, but now decide to take transformation #dim_z = z_input.shape.as_list()[-1]; #concat_func = Lambda(lambda x: layers.concatenate([x,tf.zeros(shape=(tf.shape(x)[0],dim_x-dim_z))], axis=-1)); #z_pad_input = concat_func(z_input); #output = affine_coupling_block(z_pad_input); output = first_nflow_layer(z_input, dim_x); for ii in range(n_blk): output = Lambda(perm_func, arguments={'ind':permute_ind[ii]})(output); output = affine_coupling_block(output, gen_nodes); if mdl == 'poisson': softplus_func = Lambda(lambda x: softplus(x)); output = softplus_func(output) return output
def mish(x): return x * np.tanh(softplus(x))
def call(self, x): output_mu = K.dot(x, self.kernel_1) + self.bias_1 output_sig = K.dot(x, self.kernel_2) + self.bias_2 output_sig_pos = softplus(output_sig) + 1e-3 return [output_mu, output_sig_pos]
eps = 1e-7; ############# Section I: define utility functions ############# def slice_func(x, start, size): """Utility function. We use it to take a slice of tensor start from 'start' with length 'size'. Search tf.slice for detailed use of this function. """ return tf.slice(x, [0,start],[-1,size]) def perm_func(x, ind): """Utility function. Permute x with given indices. Search tf.gather for detailed use of this function. """ return tf.gather(x, indices=ind, axis=-1); squeeze_func = Lambda(lambda x: K.squeeze(x, 1)); softplus_func = Lambda(lambda x: softplus(x)); sigmoid_func = Lambda(lambda x: sigmoid(x)); clip_func = Lambda(lambda x: K.clip(x, min_value=1e-7, max_value=1e7)); clip_func2 = Lambda(lambda x: K.clip(x, min_value=1e-7, max_value=1-1e-7)); ############# Section II: define encoders and decoders ############# ## The following three functions define GIN volume preserving flow def first_nflow_layer(z_input, dim_x, min_gen_nodes=30): """Define the first layer in GIN flow, which maps z to the cancatenation of z and t(z), t is parameterized by NN. This is equivalent to GIN model with input as z1:dim_z padding dim_x - dim_z zeros. # Arguments z_input: latents z dim_x: dimension of observations x min_gen_nodes: use max(min_gen_nodes, dim_x//4) in the hidden layer of first_nflow_layer.
def _split_and_apply_activations(self, controller_output): """ This takes the controller output, splits it in ntm_output, read and wright adressing data. It returns a triple of ntm_output, controller_instructions_read, controller_instructions_write. ntm_output is a tensor, controller_instructions_read and controller_instructions_write are lists containing the adressing instruction (k, beta, g, shift, gamma) and in case of write also the writing constructions, consisting of an erase and an add vector. As it is necesseary for stable results, k and add_vector is activated via tanh, erase_vector via sigmoid (this is critical!), shift via softmax, gamma is sigmoided, inversed and clipped (probably not ideal) g is sigmoided, beta is linear (probably not ideal!) """ # splitting ntm_output, controller_instructions_read, controller_instructions_write = tf.split( controller_output, np.asarray([self.output_dim, self.read_heads * self.controller_read_head_emitting_dim, self.write_heads * self.controller_write_head_emitting_dim]), axis=1) controller_instructions_read = tf.split(controller_instructions_read, self.read_heads, axis=1) controller_instructions_write = tf.split(controller_instructions_write, self.write_heads, axis=1) controller_instructions_read = [ tf.split(single_head_data, np.asarray([self.m_depth, 1, 1, 3, 1]), axis=1) for single_head_data in controller_instructions_read] controller_instructions_write = [ tf.split(single_head_data, np.asarray([self.m_depth, 1, 1, 3, 1, self.m_depth, self.m_depth]), axis=1) for single_head_data in controller_instructions_write] #activation ntm_output = self.activation(ntm_output) # original activations, IVM #controller_instructions_read = [(tanh(k), hard_sigmoid(beta)+0.5, sigmoid(g), softmax(shift), 1 + 9*sigmoid(gamma)) for # (k, beta, g, shift, gamma) in controller_instructions_read] #controller_instructions_write = [ # (tanh(k), hard_sigmoid(beta)+0.5, sigmoid(g), softmax(shift), # 1 + 9*sigmoid(gamma), hard_sigmoid(erase_vector), tanh(add_vector)) # for (k, beta, g, shift, gamma, erase_vector, add_vector) in controller_instructions_write] # IVM activations controller_instructions_read = [ ( tanh(k), # key softplus(beta), # beta, content based similarity sigmoid(g), # interpolation softmax(shift), # shift filter 1 + softplus(gamma) # gamma, focus sharpening ) for (k, beta, g, shift, gamma) in controller_instructions_read] controller_instructions_write = [ ( tanh(k), # key softplus(beta), # beta sigmoid(g), # interpolation softmax(shift), # shift filter 1 + softplus(gamma), # gamma, focus sharpening sigmoid(erase_vector), # erase tanh(add_vector) # add ) for (k, beta, g, shift, gamma, erase_vector, add_vector) in controller_instructions_write] return (ntm_output, controller_instructions_read, controller_instructions_write)
# ------------------------- # compute grandient penalty # ------------------------- dis_real = Input(shape=(image_size, image_size, channels)) noisev = Input(shape=(z_dim, )) dis_fake = gen(noisev) delta_input = K.placeholder(shape=(None, image_size, image_size, channels)) alpha = K.random_uniform(shape=[batch_size, 1, 1, 1], minval=0., maxval=1.) dis_mixed = Input(shape=(image_size, image_size, channels), tensor=dis_real + delta_input) loss_real = K.sum(softplus(-dis(dis_real))) / batch_size loss_fake = K.sum(softplus(dis(dis_fake))) / batch_size dis_mixed_real = alpha * dis_real + ((1 - alpha) * dis_mixed) grad_mixed = K.gradients(dis(dis_mixed_real), [dis_mixed_real])[0] norm = K.sqrt(K.sum(K.square(grad_mixed), axis=[1, 2, 3])) grad_penalty = K.mean(K.square(norm - 1)) loss_dis = loss_fake + loss_real + _lambda * grad_penalty # --------------------- # loss for discriminator # --------------------- training_updates = Adam(lr=lr_D, beta_1=0.5).get_updates(dis.trainable_weights, [],
'celu': Lambda(lambda x: tf.nn.crelu(x) * 1.270926833152771), 'elu': Lambda(lambda x: elu(x) * 1.2716004848480225), 'gelu': Lambda(lambda x: gelu(x) * 1.7015043497085571), # 'glu': lambda x: jax.nn.glu(x) * 1.8484294414520264, 'leaky_relu': Lambda(lambda x: tf.nn.leaky_relu(x) * 1.70590341091156), 'log_sigmoid': Lambda(lambda x: tf.math.log(tf.nn.sigmoid(x)) * 1.9193484783172607), 'log_softmax': Lambda(lambda x: tf.math.log(tf.nn.softmax(x)) * 1.0002083778381348), 'relu': Lambda(lambda x: relu(x) * 1.7139588594436646), 'relu6': Lambda(lambda x: tf.nn.relu6(x) * 1.7131484746932983), 'selu': Lambda(lambda x: selu(x) * 1.0008515119552612), 'sigmoid': Lambda(lambda x: sigmoid(x) * 4.803835391998291), 'silu': Lambda(lambda x: tf.nn.silu(x) * 1.7881293296813965), 'soft_sign': Lambda(lambda x: tf.nn.softsign(x) * 2.338853120803833), 'softplus': Lambda(lambda x: softplus(x) * 1.9203323125839233), 'tanh': Lambda(lambda x: tanh(x) * 1.5939117670059204), }
# + contet loss # ----------------- hr_image = Input(shape=hr_shape) lr_image = Input(shape=lr_shape) fake_hr_image = gen(lr_image) fake_features1 = VGG1(fake_hr_image) real_features1 = VGG1(hr_image) fake_features2 = VGG2(fake_hr_image) real_features2 = VGG2(hr_image) validity = dis(fake_hr_image) # ----------------- # adversarial loss # ----------------- loss_gen = 1e-3 * K.sum(softplus(-validity)) / batch_size # ----------------- # content loss # # VGG19の層の出力の誤差 # ----------------- loss_gen += content_loss(real_features1, fake_features1) loss_gen += content_loss(real_features2, fake_features2) training_updates = Adam(lr=lr_G, beta_1=beta_1).get_updates(gen.trainable_weights, [], loss_gen) gen_train = K.function([lr_image, hr_image], [loss_gen], training_updates) # -------------------------
def softplus10(x, scaler = 1.0): """ Softplus with variable softness """ return softplus(x * scaler) / scaler