def neural_style_loss(style, combination, num_channels, img_width, img_height): assert K.ndim(style) == 3 assert K.ndim(combination) == 3 S = gram_matrix(style) C = gram_matrix(combination) size = img_width * img_height return K.sum(K.square(S - C)) / (4. * (num_channels ** 2) * (size ** 2))
def test_binary_accuracy_with_threshold_(self): y_true = Input((2,)) y_pred = Input((2,)) threshold = K.placeholder((2,)) acc = binary_accuracy_with_threshold(y_true,y_pred,threshold) self.assertEqual(K.ndim(acc), 0) binary_accuracy_with_threshold_func = K.function(inputs=[y_true,y_pred,threshold], outputs=[acc]) acc_val=binary_accuracy_with_threshold_func([np.array([[0,1],[1,0]]),np.array([[0.2,0.6],[0.3,0.1]]),np.array([0.25,0.4])])[0] self.assertEqual(round(acc_val,2), 1.00,"acc_val") #works on a single threshold threshold = K.placeholder(ndim=0) acc = binary_accuracy_with_threshold(y_true, y_pred, threshold) binary_accuracy_with_threshold_func = K.function(inputs=[y_true, y_pred, threshold], outputs=[acc]) acc_val = binary_accuracy_with_threshold_func( [np.array([[0, 1], [1, 0]]), np.array([[0.2, 0.6], [0.3, 0.1]]), 0.5])[0] self.assertEqual(round(acc_val, 2), 0.75, "acc_val") # works on 3 dimension inputs y_true = Input((None,2)) y_pred = Input((None,2)) threshold = K.placeholder((2,)) acc = binary_accuracy_with_threshold(y_true,y_pred,threshold) self.assertEqual(K.ndim(acc), 0) binary_accuracy_with_threshold_func = K.function(inputs=[y_true,y_pred,threshold], outputs=[acc]) acc_val=binary_accuracy_with_threshold_func([np.array([[[0,1]],[[1,0]]]),np.array([[[0.2,0.6]],[[0.3,0.1]]]),np.array([0.25,0.4])])[0] self.assertEqual(round(acc_val,2), 1.00,"acc_val")
def buildDecomposition(self): q_embedding = self.tensors['q-embedding'] a_embedding = self.tensors['a-embedding'] q_match = self.tensors['q-match'] a_match = self.tensors['a-match'] # compute q+, q-, a+, a- # 注意为什么其他的层不需要加BATCH_SIZE,而这里却突然需要了呢? # 原因Lambda的坑,Lambda的ouput_shape不需要指定BATCH_SIZE,会 # 自行推导:当Lambda的上层输出中含有BATCH_SIZE时,使用改值作 # 为本层的BATCH_SIZE,如果没有时我就呵呵了,不知道是怎么推的。 # 因此这层Merge给定BATCH_SIZE是填下层Lambda的坑 q_channels = Merge( mode=lambda x: decomposite(*x), output_shape=(self.params['batch_size'], 2, self.q_length, self.wdim), name='q-channels' )([q_embedding, q_match]) a_channels = Merge( mode=lambda x: decomposite(*x), output_shape=(self.params['batch_size'], 2, self.a_length, self.wdim), name='a-channels', )([a_embedding, a_match]) print('q_channels', q_channels._keras_shape, K.ndim(q_channels)) print('a_channels', a_channels._keras_shape, K.ndim(a_channels)) self.tensors['q-channels'] = q_channels self.tensors['a-channels'] = a_channels
def A_network_output(x): # The input of this layer is [L, mu, a] in concatenated form. We first split # those up. idx = 0 L_flat = x[:, idx:idx + (self.nb_actions * self.nb_actions + self.nb_actions) / 2] idx += (self.nb_actions * self.nb_actions + self.nb_actions) / 2 mu = x[:, idx:idx + self.nb_actions] idx += self.nb_actions a = x[:, idx:idx + self.nb_actions] idx += self.nb_actions # Create L and L^T matrix, which we use to construct the positive-definite matrix P. Ls = [] LTs = [] for idx in xrange(self.batch_size): L = K.zeros((self.nb_actions, self.nb_actions)) L = T.set_subtensor(L[np.tril_indices(self.nb_actions)], L_flat[idx, :]) diag = K.exp(T.diag(L)) L = T.set_subtensor(L[np.diag_indices(self.nb_actions)], diag) Ls.append(L) LTs.append(K.transpose(L)) # TODO: diagonal elements exp L = K.pack(Ls) LT = K.pack(LTs) P = K.batch_dot(L, LT, axes=(1, 2)) assert K.ndim(P) == 3 # Combine a, mu and P into a scalar (over the batches). A = -.5 * K.batch_dot(K.batch_dot(a - mu, P, axes=(1, 2)), a - mu, axes=1) assert K.ndim(A) == 2 return A
def style_loss(style1, style2): assert K.ndim(style1) == 3 assert K.ndim(style2) == 3 S = gram_matrix(style1) C = gram_matrix(style2) channels = 3 size = img_width * img_height return K.sum(K.square(S - C)) / (4. * (channels ** 2) * (size ** 2))
def style_loss(style, combination): assert K.ndim(style) == 3 assert K.ndim(combination) == 3 S = gram_matrix(style) C = gram_matrix(combination) channels = 3 size = img_width * img_height return K.sum(K.square(S - C)) / (4. * (channels ** 2) * (size ** 2))
def style_loss(style, combination): assert K.ndim(style) == 3 assert K.ndim(combination) == 3 S = gram_matrix(style) C = gram_matrix(combination) channels = 3 size = img_nrows * img_ncols return K.sum(K.square(S - C)) / (4.0 * (channels ** 2) * (size ** 2))
def gram_matrix(x): """ the gram matrix of an image tensor (feature-wise outer product) :param x: The tensor contains image features :return: A gram matrix """ if K.ndim(x) == 4: x = x[0, :, :, :] assert K.ndim(x) == 3 features = K.batch_flatten(x) gram = K.dot(features, K.transpose(features)) return gram
def compute_mask(self, x, mask=None): if mask is None: return None #import pdb #pdb.set_trace() target_dim = K.ndim(x) - 2 num_reducing = K.ndim(mask) - target_dim if num_reducing: axes = tuple([-i for i in range(1,num_reducing+1)]) mask = K.any(mask, axes) return mask
def softmax(x, axis, mask=None): if mask is None: mask = K.constant(True) mask = K.cast(mask, K.floatx()) if K.ndim(x) is K.ndim(mask) + 1: mask = K.expand_dims(mask) m = K.max(x, axis=axis, keepdims=True) e = K.exp(x - m) * mask s = K.sum(e, axis=axis, keepdims=True) s += K.cast(K.cast(s < K.epsilon(), K.floatx()) * K.epsilon(), K.floatx()) return e / s
def weighted(y_true, y_pred, weights, mask=None): assert mask is None assert weights is not None score_array = fn(y_true, y_pred) # reduce score_array to same ndim as weight array ndim = K.ndim(score_array) weight_ndim = K.ndim(weights) score_array = K.mean(score_array, axis=list(range(weight_ndim, ndim))) # apply sample weighting score_array *= weights word_scores = K.sum(score_array, axis=-1) return K.mean(word_scores)
def buildFeatures(self, type='shared'): assert self.checkTensor('q-channels') assert self.checkTensor('a-channels') srelu = lambda name: SReLU(name=name) features = [] if type == 'shared': q_features = self.linkFeature('q-channels', 'shared-convolution', activation='tanh') a_features = self.linkFeature('a-channels', 'shared-convolution', activation='tanh') else: raise Error('Not Supported') print('q-features', q_features._keras_shape, K.ndim(q_features)) print('a-features', a_features._keras_shape, K.ndim(a_features)) self.tensors['q-features'] = q_features self.tensors['a-features'] = a_features
def build_loss(self): r"""Implements the N-dim version of function $$TV^{\beta}(x) = \sum_{whc} \left ( \left ( x(h, w+1, c) - x(h, w, c) \right )^{2} + \left ( x(h+1, w, c) - x(h, w, c) \right )^{2} \right )^{\frac{\beta}{2}}$$ to return total variation for all images in the batch. """ image_dims = K.ndim(self.img) - 2 # Constructing slice [1:] + [:-1] * (image_dims - 1) and [:-1] * (image_dims) start_slice = [slice(1, None, None)] + [slice(None, -1, None) for _ in range(image_dims - 1)] end_slice = [slice(None, -1, None) for _ in range(image_dims)] samples_channels_slice = [slice(None, None, None), slice(None, None, None)] # Compute pixel diffs by rolling slices to the right per image dim. tv = None for i in range(image_dims): ss = tuple(samples_channels_slice + start_slice) es = tuple(samples_channels_slice + end_slice) diff_square = K.square(self.img[utils.slicer[ss]] - self.img[utils.slicer[es]]) tv = diff_square if tv is None else tv + diff_square # Roll over to next image dim start_slice = np.roll(start_slice, 1).tolist() end_slice = np.roll(end_slice, 1).tolist() tv = K.sum(K.pow(tv, self.beta / 2.)) return normalize(self.img, tv)
def get_output(self, train=False): def format_shape(shape): if K._BACKEND == 'tensorflow': def trf(x): try: return int(x) except TypeError: return x return map(trf, shape) return shape X = self.get_input(train) in_shape = format_shape(K.shape(X)) batch_flatten_len = K.prod(in_shape[:2]) cast_in_shape = (batch_flatten_len, ) + tuple(in_shape[i] for i in range(2, K.ndim(X))) pre_outs = self.layer(K.reshape(X, cast_in_shape)) out_shape = format_shape(K.shape(pre_outs)) cast_out_shape = (in_shape[0], in_shape[1]) + tuple(out_shape[i] for i in range(1, K.ndim(pre_outs))) outputs = K.reshape(pre_outs, cast_out_shape) return outputs
def get_output(self, train = False, get_tuple = False): # input shape: (nb_samples, time (padded with zeros), input_dim) X = self.get_input(train) assert K.ndim(X) == 3 mask = self.get_output_mask(train) if mask: # apply mask X *= K.cast(K.expand_dims(mask), X.dtype) masking = True else: masking = False if self.stateful: initial_states = self.states else: initial_states = self.get_initial_states(X) last_output, outputs, other_outputs, states = LX.rnn(self.attention_step, X, initial_states, self.contexts, truncate_gradient=self.truncate_gradient, go_backwards=self.go_backwards, masking=masking) self.other_outputs = other_outputs if self.stateful: self.updates = [] for i in range(len(states)): self.updates.append((self.states[i], states[i])) if self.return_sequences: return outputs else: return last_output
def _step(time, output_ta_t, *states): """RNN step function. # Arguments time: Current timestep value. output_ta_t: TensorArray. *states: List of states. # Returns Tuple: `(time + 1,output_ta_t) + tuple(new_states)` """ current_input = input_ta.read(time) random_cutoff_prob = tf.random_uniform( (num_samples,), minval=0, maxval=1) output, new_states = step_function(current_input, {'initial_states': states, 'random_cutoff_prob': random_cutoff_prob, 'rec_dp_mask': rec_dp_constants}) # returned output is ( raw/sampled, batch, output_dim) axes = [1, 0] + list(range(2, K.ndim(output))) output = tf.transpose(output, (axes)) for state, new_state in zip(states, new_states): new_state.set_shape(state.get_shape()) output_ta_t = output_ta_t.write(time, output) return (time + 1, output_ta_t) + tuple(new_states)
def get_output(self, train=False): # input shape: (nb_samples, time (padded with zeros), input_dim) X = self.get_input(train) # mask = self.get_input_mask(train) assert K.ndim(X) == 3 assert K._BACKEND == 'theano' # if self.stateful: #TODO: this seems important # initial_states = self.states # else: # initial_states = self.get_initial_states(X) initial_states = self.states #?? ## last_output, outputs, states = K.renn(self.step, X, ## initial_states, ## go_backwards=self.go_backwards) #todo: ?!?! last_output, outputs, states = K.renn(self.step, X, initial_states, go_backwards=self.go_backwards) if self.stateful: self.updates = [] for i in range(len(states)): self.updates.append((self.states[i], states[i])) return outputs
def call(self, position): inputDim = K.ndim(position) positionShape = K.shape(position) targetDim = positionShape[-1] position = K.reshape(position, (-1, targetDim)) samples = K.shape(position)[0] theta = THT.zeros((samples, 3, 3)) chw = self.toChw(position) chw = K.reshape(chw, (samples, targetDim)) dx = -self.distortion + 2.0 * self.distortion * self.srng.uniform((samples,)) dy = -self.distortion + 2.0 * self.distortion * self.srng.uniform((samples,)) cX = chw[:, 0] + dx cY = chw[:, 1] + dy h = K.maximum(chw[:, 2] * (1.0 + self.context), self.minSide) w = K.maximum(chw[:, 3] * (1.0 + self.context), self.minSide) # Calculating the parameters of the transformation tx = cX ty = cY sx = w / 2.0 # Scale x sy = h / 2.0 # Scale y # Setting transformation theta = THT.set_subtensor(theta[:, 0, 0], sx) theta = THT.set_subtensor(theta[:, 1, 1], sy) theta = THT.set_subtensor(theta[:, 0, 2], tx) theta = THT.set_subtensor(theta[:, 1, 2], ty) theta = THT.set_subtensor(theta[:, 2, 2], 1.0) thetaShape = K.concatenate([positionShape[:-1], K.shape(theta)[-2:]]) theta = THT.reshape(theta, thetaShape, ndim=inputDim + 1) return theta
def call(self, x, mask=None): constants = self.get_constants(x) assert K.ndim(x) == 5 if K._BACKEND == 'tensorflow': if not self.input_shape[1]: raise Exception('When using TensorFlow, you should define ' + 'explicitely the number of timesteps of ' + 'your sequences. Make sure the first layer ' + 'has a "batch_input_shape" argument ' + 'including the samples axis.') if self.stateful: initial_states = self.states else: initial_states = self.get_initial_states(x) last_output, outputs, states = K.rnn(self.step, x, initial_states, go_backwards=self.go_backwards, mask=mask, constants=constants) if self.stateful: self.updates = [] for i in range(len(states)): self.updates.append((self.states[i], states[i])) if self.return_sequences: return outputs else: return last_output
def gram_matrix(x): assert Kr.ndim(x) == 3 features = Kr.batch_flatten(x) gram = Kr.dot(features, Kr.transpose(features)) return gram
def __call__(self, loss): x = self.layer.get_output(True) assert K.ndim(x) == 4 a = K.square(x[:, :, 1:, :-1] - x[:, :, :-1, :-1]) b = K.square(x[:, :, :-1, 1:] - x[:, :, :-1, :-1]) loss += self.weight * K.mean(K.sum(K.pow(a + b, 1.25), axis=(1,2,3))) return loss
def __call__(self, loss): from . import patches output = self.layer.get_output(True) assert K.ndim(output) == 4 batch_size = K.shape(output)[0] // 2 patch_size = self.patch_size patch_stride = 1 generated = output[:batch_size, :, :, :] content = output[batch_size:, :, :, :] # extract patches from feature maps generated_patches, generated_patches_norm = \ patches.make_patches(generated, patch_size, patch_stride) content_patches, content_patches_norm = \ patches.make_patches(content, patch_size, patch_stride) a_patches, a_patches_norm = \ patches.make_patches(K.variable(self.features_a), patch_size, patch_stride) ap_patches, ap_patches_norm = \ patches.make_patches(K.variable(self.features_ap), patch_size, patch_stride) # find best patches and calculate loss patch_ids = patches.find_patch_matches( content_patches, content_patches_norm, a_patches / a_patches_norm) best_analogy_patches = K.reshape( ap_patches[patch_ids], K.shape(generated_patches)) loss += self.weight * K.sum(K.square(best_analogy_patches - generated_patches)) / patch_size ** 2 return loss
def call(self, inputs, **kwargs): assert isinstance(inputs, list) and len(inputs) == 3 first, second, features = inputs[0], inputs[1], inputs[2] if not self.from_logits: first = kb.clip(first, 1e-10, 1.0) second = kb.clip(second, 1e-10, 1.0) first_, second_ = kb.log(first), kb.log(second) else: first_, second_ = first, second # embedded_features.shape = (M, T, 1) if self.use_intermediate_layer: features = kb.dot(features, self.first_kernel) features = kb.bias_add(features, self.first_bias, data_format="channels_last") features = self.intermediate_activation(features) embedded_features = kb.dot(features, self.features_kernel) embedded_features = kb.bias_add( embedded_features, self.features_bias, data_format="channels_last") if self.use_dimension_bias: tiling_shape = [1] * (kb.ndim(first)-1) + [kb.shape(first)[-1]] embedded_features = kb.tile(embedded_features, tiling_shape) embedded_features = kb.bias_add( embedded_features, self.dimensions_bias, data_format="channels_last") sigma = kb.sigmoid(embedded_features) result = weighted_sum(first_, second_, sigma, self.first_threshold, self.second_threshold) probs = kb.softmax(result) if self.return_logits: return [probs, result] return probs
def criterion_GAN(output, target, use_lsgan=True): if use_lsgan: diff = output - target dims = list(range(1, K.ndim(diff))) return K.expand_dims((K.mean(diff ** 2, dims)), 0) else: return K.mean(K.log(output + 1e-12) * target + K.log(1 - output + 1e-12) * (1 - target))
def call(self, x, mask=None): # x: (batch_size, input_length, input_dim) if mask is None: return K.mean(x, axis=1) # (batch_size, input_dim) else: # This is to remove padding from the computational graph. if K.ndim(mask) > K.ndim(x): # This is due to the bug in Bidirectional that is passing the input mask # instead of computing output mask. # TODO: Fix the implementation of Bidirectional. mask = K.any(mask, axis=(-2, -1)) if K.ndim(mask) < K.ndim(x): mask = K.expand_dims(mask) masked_input = switch(mask, x, K.zeros_like(x)) weights = K.cast(mask / (K.sum(mask) + K.epsilon()), 'float32') return K.sum(masked_input * weights, axis=1) # (batch_size, input_dim)
def style_loss(style_image, target_image, style_masks, target_masks): '''Calculate style loss between style_image and target_image, in all regions. ''' assert 3 == K.ndim(style_image) == K.ndim(target_image) assert 3 == K.ndim(style_masks) == K.ndim(target_masks) loss = K.variable(0) for i in range(nb_labels): if K.image_dim_ordering() == 'th': style_mask = style_masks[i, :, :] target_mask = target_masks[i, :, :] else: style_mask = style_masks[:, :, i] target_mask = target_masks[:, :, i] loss += region_style_weight * region_style_loss(style_image, target_image, style_mask, target_mask) return loss
def total_variation_loss(x): assert K.ndim(x) == 4 a = K.square(x[:, :, 1:, :img_width - 1] - x[:, :, :img_height - 1, :img_width - 1]) b = K.square(x[:, :, :img_height - 1, 1:] - x[:, :, :img_width - 1, :img_height - 1]) #a = K.square(x[:, :, :img_width-1, :img_height-1] - x[:, :, 1:, :img_height-1]) #b = K.square(x[:, :, :img_width-1, :img_height-1] - x[:, :, :img_width-1, 1:]) return K.sum(K.pow(a + b, 1.25))
def teacher_forced(h, states): # switching from (batch_size, previous_layer_input|true_input, output_dim) # to ( previous_layer_input|true_input, batch_size, output_dim) axes = [1, 0] + list(range(2, K.ndim(h))) h = K.permute_dimensions(h, axes) prev_layer_input = h[0:1, :, :] true_input = h[1:, :, :self.units] # this should correspond to true input prev_sampled_output = true_input if self.implementation == 0: x_z = prev_layer_input[0, :, :self.units] x_r = prev_layer_input[0, :, self.units: 2 * self.units] x_h = prev_layer_input[0, :, 2 * self.units:] else: raise ValueError('Implementation type ' + self.implementation + ' is invalid') z = self.recurrent_activation(x_z + K.dot(h_tm1 * rec_dp_mask[0], self.recurrent_kernel_z)) r = self.recurrent_activation(x_r + K.dot(h_tm1 * rec_dp_mask[1], self.recurrent_kernel_r)) hh = self.activation(x_h + K.dot(r * h_tm1 * rec_dp_mask[2], self.recurrent_kernel_h) + K.dot(r * prev_sampled_output, self.recurrent_kernel_y)) output = z * h_tm1 + (1. - z) * hh return K.stack([output, output])
def free_running(h, states): prev_generated_output = initial_states[0][1:, :, :] prev_sampled_output = prev_generated_output # switching from (batch_size, previous_layer_input|true_input, output_dim) # to ( previous_layer_input|true_input, batch_size, output_dim) axes = [1, 0] + list(range(2, K.ndim(h))) h = K.permute_dimensions(h, axes) prev_layer_input = h[0:1, :, :] if self.implementation == 0: x_z = prev_layer_input[0, :, :self.units] x_r = prev_layer_input[0, :, self.units: 2 * self.units] x_h = prev_layer_input[0, :, 2 * self.units:] z = self.recurrent_activation(x_z + K.dot(h_tm1 * rec_dp_mask[0], self.recurrent_kernel_z)) r = self.recurrent_activation(x_r + K.dot(h_tm1 * rec_dp_mask[1], self.recurrent_kernel_r)) hh = self.activation(x_h + K.dot(r * h_tm1 * rec_dp_mask[2], self.recurrent_kernel_h) + K.dot(r * prev_sampled_output, self.recurrent_kernel_y)) output = z * h_tm1 + (1. - z) * hh final_output = self.output_sampling(output, random_cutoff_vec) return K.stack([output, final_output])
def call(self, x, mask=None): assert self.built, 'Layer must be built before being called' input_shape = K.int_shape(x) reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] if sorted(reduction_axes) == range(K.ndim(x))[:-1]: x_normed = K.batch_normalization( x, self.running_mean, self.running_std, self.beta, self.gamma, epsilon=self.epsilon) else: # need broadcasting broadcast_running_mean = K.reshape(self.running_mean, broadcast_shape) broadcast_running_std = K.reshape(self.running_std, broadcast_shape) broadcast_beta = K.reshape(self.beta, broadcast_shape) broadcast_gamma = K.reshape(self.gamma, broadcast_shape) x_normed = K.batch_normalization( x, broadcast_running_mean, broadcast_running_std, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) return x_normed
def normalize_vector(x): z = K.sum(K.batch_flatten(K.square(x)), axis=1) while K.ndim(z) < K.ndim(x): z = K.expand_dims(z, axis=-1) return x / (K.sqrt(z))
def gram_matrix(x): assert K.ndim(x) == 3 features = K.batch_flatten(x) gram = K.dot(features, K.transpose(features)) return gram
def call(self, inputs): input_shape = K.shape(inputs) ndim = K.ndim(inputs) reduction_axes = list(range(ndim)) del reduction_axes[self.axis] del reduction_axes[0] input_dim = input_shape[self.axis] // 2 mu = K.mean(inputs, axis=reduction_axes) broadcast_mu_shape = [1] * ndim broadcast_mu_shape[self.axis] = input_shape[self.axis] broadcast_mu_shape[0] = K.shape(inputs)[0] broadcast_mu = K.reshape(mu, broadcast_mu_shape) if self.center: input_centred = inputs - broadcast_mu else: input_centred = inputs centred_squared = input_centred**2 if (self.axis == 1 and ndim != 3) or ndim == 2: centred_squared_real = centred_squared[:, :input_dim] centred_squared_imag = centred_squared[:, input_dim:] centred_real = input_centred[:, :input_dim] centred_imag = input_centred[:, input_dim:] elif ndim == 3: centred_squared_real = centred_squared[:, :, :input_dim] centred_squared_imag = centred_squared[:, :, input_dim:] centred_real = input_centred[:, :, :input_dim] centred_imag = input_centred[:, :, input_dim:] elif self.axis == -1 and ndim == 4: centred_squared_real = centred_squared[:, :, :, :input_dim] centred_squared_imag = centred_squared[:, :, :, input_dim:] centred_real = input_centred[:, :, :, :input_dim] centred_imag = input_centred[:, :, :, input_dim:] elif self.axis == -1 and ndim == 5: centred_squared_real = centred_squared[:, :, :, :, :input_dim] centred_squared_imag = centred_squared[:, :, :, :, input_dim:] centred_real = input_centred[:, :, :, :, :input_dim] centred_imag = input_centred[:, :, :, :, input_dim:] else: raise ValueError( 'Incorrect Layernorm combination of axis and dimensions. axis should be either 1 or -1. ' 'axis: ' + str(self.axis) + '; ndim: ' + str(ndim) + '.') if self.scale: Vrr = K.mean(centred_squared_real, axis=reduction_axes) + self.epsilon Vii = K.mean(centred_squared_imag, axis=reduction_axes) + self.epsilon # Vri contains the real and imaginary covariance for each feature map. Vri = K.mean( centred_real * centred_imag, axis=reduction_axes, ) elif self.center: Vrr = None Vii = None Vri = None else: raise ValueError( 'Error. Both scale and center in batchnorm are set to False.') return complex_normalization(input_centred, Vrr, Vii, Vri, self.beta, self.gamma_rr, self.gamma_ri, self.gamma_ii, self.scale, self.center, layernorm=True, axis=self.axis)
def total_variation(y): assert K.ndim(y) == 4 a = K.square(y[:, :res - 1, :res - 1, :] - y[:, 1:, :res - 1, :]) b = K.square(y[:, :res - 1, :res - 1, :] - y[:, :res - 1, 1:, :]) return K.mean(K.pow(a + b, 2))
def ComplexBN(input_centred, Vrr, Vii, Vri, beta, gamma_rr, gamma_ri, gamma_ii, scale=True, center=True, layernorm=False, axis=-1): ndim = K.ndim(input_centred) input_dim = K.shape(input_centred)[axis] // 2 if scale: gamma_broadcast_shape = [1] * ndim gamma_broadcast_shape[axis] = input_dim if center: broadcast_beta_shape = [1] * ndim broadcast_beta_shape[axis] = input_dim * 2 if scale: standardized_output = complex_standardization(input_centred, Vrr, Vii, Vri, layernorm, axis=axis) # Now we perform th scaling and Shifting of the normalized x using # the scaling parameter # [ gamma_rr gamma_ri ] # Gamma = [ gamma_ri gamma_ii ] # and the shifting parameter # Beta = [beta_real beta_imag].T # where: # x_real_BN = gamma_rr * x_real_normed + gamma_ri * x_imag_normed + beta_real # x_imag_BN = gamma_ri * x_real_normed + gamma_ii * x_imag_normed + beta_imag broadcast_gamma_rr = K.reshape(gamma_rr, gamma_broadcast_shape) broadcast_gamma_ri = K.reshape(gamma_ri, gamma_broadcast_shape) broadcast_gamma_ii = K.reshape(gamma_ii, gamma_broadcast_shape) cat_gamma_4_real = K.concatenate( [broadcast_gamma_rr, broadcast_gamma_ii], axis=axis) cat_gamma_4_imag = K.concatenate( [broadcast_gamma_ri, broadcast_gamma_ri], axis=axis) if (axis == 1 and ndim != 3) or ndim == 2: centred_real = standardized_output[:, :input_dim] centred_imag = standardized_output[:, input_dim:] elif ndim == 3: centred_real = standardized_output[:, :, :input_dim] centred_imag = standardized_output[:, :, input_dim:] elif axis == -1 and ndim == 4: centred_real = standardized_output[:, :, :, :input_dim] centred_imag = standardized_output[:, :, :, input_dim:] elif axis == -1 and ndim == 5: centred_real = standardized_output[:, :, :, :, :input_dim] centred_imag = standardized_output[:, :, :, :, input_dim:] else: raise ValueError( 'Incorrect Batchnorm combination of axis and dimensions. axis should be either 1 or -1. ' 'axis: ' + str(self.axis) + '; ndim: ' + str(ndim) + '.') rolled_standardized_output = K.concatenate( [centred_imag, centred_real], axis=axis) if center: broadcast_beta = K.reshape(beta, broadcast_beta_shape) return cat_gamma_4_real * standardized_output + cat_gamma_4_imag * rolled_standardized_output + broadcast_beta else: return cat_gamma_4_real * standardized_output + cat_gamma_4_imag * rolled_standardized_output else: if center: broadcast_beta = K.reshape(beta, broadcast_beta_shape) return input_centred + broadcast_beta else: return input_centred
def call(self, x, mask=None): # TODO: validate input shape assert (len(x) == 3) L_flat = x[0] mu = x[1] a = x[2] if self.mode == 'full': # Create L and L^T matrix, which we use to construct the positive-definite matrix P. L = None LT = None if K.backend() == 'theano': import theano.tensor as T import theano def fn(x, L_acc, LT_acc): x_ = K.zeros((self.nb_actions, self.nb_actions)) x_ = T.set_subtensor(x_[np.tril_indices(self.nb_actions)], x) diag = K.exp(T.diag(x_)) + K.epsilon() x_ = T.set_subtensor(x_[np.diag_indices(self.nb_actions)], diag) return x_, x_.T outputs_info = [ K.zeros((self.nb_actions, self.nb_actions)), K.zeros((self.nb_actions, self.nb_actions)), ] results, _ = theano.scan(fn=fn, sequences=L_flat, outputs_info=outputs_info) L, LT = results elif K.backend() == 'tensorflow': import tensorflow as tf # Number of elements in a triangular matrix. nb_elems = (self.nb_actions * self.nb_actions + self.nb_actions) // 2 # Create mask for the diagonal elements in L_flat. This is used to exponentiate # only the diagonal elements, which is done before gathering. diag_indeces = [0] for row in range(1, self.nb_actions): diag_indeces.append(diag_indeces[-1] + (row + 1)) diag_mask = np.zeros(1 + nb_elems) # +1 for the leading zero diag_mask[np.array(diag_indeces) + 1] = 1 diag_mask = K.variable(diag_mask) # Add leading zero element to each element in the L_flat. We use this zero # element when gathering L_flat into a lower triangular matrix L. nb_rows = tf.shape(L_flat)[0] zeros = tf.expand_dims(tf.tile(K.zeros((1,)), [nb_rows]), 1) try: # Old TF behavior. L_flat = tf.concat(1, [zeros, L_flat]) except TypeError: # New TF behavior L_flat = tf.concat([zeros, L_flat], 1) # Create mask that can be used to gather elements from L_flat and put them # into a lower triangular matrix. tril_mask = np.zeros((self.nb_actions, self.nb_actions), dtype='int32') tril_mask[np.tril_indices(self.nb_actions)] = range(1, nb_elems + 1) # Finally, process each element of the batch. init = [ K.zeros((self.nb_actions, self.nb_actions)), K.zeros((self.nb_actions, self.nb_actions)), ] def fn(a, x): # Exponentiate everything. This is much easier than only exponentiating # the diagonal elements, and, usually, the action space is relatively low. x_ = K.exp(x) + K.epsilon() # Only keep the diagonal elements. x_ *= diag_mask # Add the original, non-diagonal elements. x_ += x * (1. - diag_mask) # Finally, gather everything into a lower triangular matrix. L_ = tf.gather(x_, tril_mask) return [L_, tf.transpose(L_)] tmp = tf.scan(fn, L_flat, initializer=init) if isinstance(tmp, (list, tuple)): # TensorFlow 0.10 now returns a tuple of tensors. L, LT = tmp else: # Old TensorFlow < 0.10 returns a shared tensor. L = tmp[:, 0, :, :] LT = tmp[:, 1, :, :] else: raise RuntimeError('Unknown Keras backend "{}".'.format(K.backend())) assert L is not None assert LT is not None P = K.batch_dot(L, LT) elif self.mode == 'diag': if K.backend() == 'theano': import theano.tensor as T import theano def fn(x, P_acc): x_ = K.zeros((self.nb_actions, self.nb_actions)) x_ = T.set_subtensor(x_[np.diag_indices(self.nb_actions)], x) return x_ outputs_info = [ K.zeros((self.nb_actions, self.nb_actions)), ] P, _ = theano.scan(fn=fn, sequences=L_flat, outputs_info=outputs_info) elif K.backend() == 'tensorflow': import tensorflow as tf # Create mask that can be used to gather elements from L_flat and put them # into a diagonal matrix. diag_mask = np.zeros((self.nb_actions, self.nb_actions), dtype='int32') diag_mask[np.diag_indices(self.nb_actions)] = range(1, self.nb_actions + 1) # Add leading zero element to each element in the L_flat. We use this zero # element when gathering L_flat into a lower triangular matrix L. nb_rows = tf.shape(L_flat)[0] zeros = tf.expand_dims(tf.tile(K.zeros((1,)), [nb_rows]), 1) try: # Old TF behavior. L_flat = tf.concat(1, [zeros, L_flat]) except TypeError: # New TF behavior L_flat = tf.concat([zeros, L_flat], 1) # Finally, process each element of the batch. def fn(a, x): x_ = tf.gather(x, diag_mask) return x_ P = tf.scan(fn, L_flat, initializer=K.zeros((self.nb_actions, self.nb_actions))) else: raise RuntimeError('Unknown Keras backend "{}".'.format(K.backend())) assert P is not None assert K.ndim(P) == 3 # Combine a, mu and P into a scalar (over the batches). What we compute here is # -.5 * (a - mu)^T * P * (a - mu), where * denotes the dot-product. Unfortunately # TensorFlow handles vector * P slightly suboptimal, hence we convert the vectors to # 1xd/dx1 matrices and finally flatten the resulting 1x1 matrix into a scalar. All # operations happen over the batch size, which is dimension 0. prod = K.batch_dot(K.expand_dims(a - mu, 1), P) prod = K.batch_dot(prod, K.expand_dims(a - mu, -1)) A = -.5 * K.batch_flatten(prod) assert K.ndim(A) == 2 return A
def complex_standardization(input_centred, Vrr, Vii, Vri, layernorm=False, axis=-1): """Complex Standardization of input Arguments: input_centred -- Input Tensor Vrr -- Real component of covariance matrix V Vii -- Imaginary component of covariance matrix V Vri -- Non-diagonal component of covariance matrix V Keyword Arguments: layernorm {bool} -- Normalization (default: {False}) axis {int} -- Axis for Standardization (default: {-1}) Raises: ValueError: Mismatched dimensoins Returns: Complex standardized input """ ndim = K.ndim(input_centred) input_dim = K.shape(input_centred)[axis] // 2 variances_broadcast = [1] * ndim variances_broadcast[axis] = input_dim if layernorm: variances_broadcast[0] = K.shape(input_centred)[0] # We require the covariance matrix's inverse square root. That first # requires square rooting, followed by inversion (I do this in that order # because during the computation of square root we compute the determinant # we'll need for inversion as well). # tau = Vrr + Vii = Trace. Guaranteed >= 0 because SPD tau = Vrr + Vii # delta = (Vrr * Vii) - (Vri ** 2) = Determinant. Guaranteed >= 0 because # SPD delta = (Vrr * Vii) - (Vri**2) s = K.sqrt(delta) # Determinant of square root matrix t = K.sqrt(tau + 2 * s) # The square root matrix could now be explicitly formed as # [ Vrr+s Vri ] # (1/t) [ Vir Vii+s ] # https://en.wikipedia.org/wiki/Square_root_of_a_2_by_2_matrix # but we don't need to do this immediately since we can also simultaneously # invert. We can do this because we've already computed the determinant of # the square root matrix, and can thus invert it using the analytical # solution for 2x2 matrices # [ A B ] [ D -B ] # inv( [ C D ] ) = (1/det) [ -C A ] # http://mathworld.wolfram.com/MatrixInverse.html # Thus giving us # [ Vii+s -Vri ] # (1/s)(1/t)[ -Vir Vrr+s ] # So we proceed as follows: inverse_st = 1.0 / (s * t) Wrr = (Vii + s) * inverse_st Wii = (Vrr + s) * inverse_st Wri = -Vri * inverse_st # And we have computed the inverse square root matrix W = sqrt(V)! # Normalization. We multiply, x_normalized = W.x. # The returned result will be a complex standardized input # where the real and imaginary parts are obtained as follows: # x_real_normed = Wrr * x_real_centred + Wri * x_imag_centred # x_imag_normed = Wri * x_real_centred + Wii * x_imag_centred broadcast_Wrr = K.reshape(Wrr, variances_broadcast) broadcast_Wri = K.reshape(Wri, variances_broadcast) broadcast_Wii = K.reshape(Wii, variances_broadcast) cat_W_4_real = K.concatenate([broadcast_Wrr, broadcast_Wii], axis=axis) cat_W_4_imag = K.concatenate([broadcast_Wri, broadcast_Wri], axis=axis) if (axis == 1 and ndim != 3) or ndim == 2: centred_real = input_centred[:, :input_dim] centred_imag = input_centred[:, input_dim:] elif ndim == 3: centred_real = input_centred[:, :, :input_dim] centred_imag = input_centred[:, :, input_dim:] elif axis == -1 and ndim == 4: centred_real = input_centred[:, :, :, :input_dim] centred_imag = input_centred[:, :, :, input_dim:] elif axis == -1 and ndim == 5: centred_real = input_centred[:, :, :, :, :input_dim] centred_imag = input_centred[:, :, :, :, input_dim:] else: raise ValueError( 'Incorrect Batchnorm combination of axis and dimensions. axis ' 'should be either 1 or -1. ' 'axis: ' + str(axis) + '; ndim: ' + str(ndim) + '.') rolled_input = K.concatenate([centred_imag, centred_real], axis=axis) output = cat_W_4_real * input_centred + cat_W_4_imag * rolled_input # Wrr * x_real_centered | Wii * x_imag_centered # + Wri * x_imag_centered | Wri * x_real_centered # ----------------------------------------------- # = output return output
def get_probs_from_logits(logits): logits_shape = K.shape(logits) logits_flat = K.reshape(logits, shape=(-1, logits_shape[K.ndim(logits) - 1])) probs_flat = K.softmax(logits_flat) return K.reshape(probs_flat, shape=logits_shape)
def call(self, inputs): # Note that I is useless, because thee layer cannot be used in graph # batch mode. if len(inputs) == 3: X, A, I = inputs else: X, A = inputs I = None # Check if the layer is operating in batch mode (X and A have rank 3) batch_mode = K.ndim(A) == 3 # Optionally compute hidden layer if self.h is None: Hid = X else: Hid = K.dot(X, self.kernel_in) if self.use_bias: Hid = K.bias_add(Hid, self.bias_in) if self.activation is not None: Hid = self.activation(Hid) # Compute cluster assignment matrix S = K.dot(Hid, self.kernel_out) if self.use_bias: S = K.bias_add(S, self.bias_out) S = activations.softmax( S, axis=-1) # Apply softmax to get cluster assignments # MinCut regularization A_pooled = ops.matmul_AT_B_A(S, A) num = tf.trace(A_pooled) D = ops.degree_matrix(A) den = tf.trace(ops.matmul_AT_B_A(S, D)) cut_loss = -(num / den) if batch_mode: cut_loss = K.mean(cut_loss) self.add_loss(cut_loss) # Orthogonality regularization SS = ops.matmul_AT_B(S, S) I_S = tf.eye(self.k) ortho_loss = tf.norm(SS / tf.norm(SS, axis=(-1, -2)) - I_S / tf.norm(I_S), axis=(-1, -2)) if batch_mode: ortho_loss = K.mean(cut_loss) self.add_loss(ortho_loss) # Pooling X_pooled = ops.matmul_AT_B(S, X) A_pooled = tf.linalg.set_diag(A_pooled, tf.zeros( K.shape(A_pooled)[:-1])) # Remove diagonal A_pooled = ops.normalize_A(A_pooled) output = [X_pooled, A_pooled] if I is not None: I_mean = tf.segment_mean(I, I) I_pooled = ops.tf_repeat_1d(I_mean, tf.ones_like(I_mean) * self.k) output.append(I_pooled) if self.return_mask: output.append(S) return output
def squash_mask(self, mask): if K.ndim(mask) == 2: return mask elif K.ndim(mask) == 3: return K.any(mask, axis=-1)
def call(self, inputs): num_axis = K.ndim(inputs) inputs = K.permute_dimensions(inputs, range(num_axis)[::-1]) x_outs = K.gather(inputs, self.idxs) x_outs = K.permute_dimensions(x_outs, range(num_axis)[::-1]) return x_outs
def sampled_softmax_loss(weights, biases, num_sampled, num_classes, labels, inputs, mask=None, num_true=1, sampled_values=None, remove_accidental_hits=True): """Computes and returns the sampled softmax training loss. This is a faster way to train a softmax classifier over a huge number of classes. This operation is for training only. It is generally an underestimate of the full softmax loss. At inference time, you can compute full softmax probabilities with the expression `tf.nn.softmax(tf.matmul(inputs, tf.transpose(weights)) + biases)`. See our [Candidate Sampling Algorithms Reference] (../../extras/candidate_sampling.pdf) Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007) ([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math. Args: weights: A `Tensor` of shape `[num_classes, dim]`, or a list of `Tensor` objects whose concatenation along dimension 0 has shape [num_classes, dim]. The (possibly-sharded) class embeddings. biases: A `Tensor` of shape `[num_classes]`. The class biases. inputs: A `Tensor` of shape `[time steps, batch_size, dim]`. The forward activations of the input network. mask: A tensor of shape [time_steps, batch_size,1]. labels: A `Tensor` of type `int64` and shape `[time_steps,batch_size, num_true]`. The target classes. Note that this format differs from the `labels` argument of `nn.softmax_cross_entropy_with_logits`. num_sampled: An `int`. The number of classes to randomly sample per batch. num_classes: An `int`. The number of possible classes. num_true: An `int`. The number of target classes per training example. sampled_values: a tuple of (`sampled_candidates`, `true_expected_count`, `sampled_expected_count`) returned by a `*_candidate_sampler` function. (if None, we default to `log_uniform_candidate_sampler`) remove_accidental_hits: A `bool`. whether to remove "accidental hits" where a sampled class equals one of the target classes. Default is True. partition_strategy: A string specifying the partitioning strategy, relevant if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported. Default is `"mod"`. See `tf.nn.embedding_lookup` for more details. name: A name for the operation (optional). Returns: A `batch_size` 1-D tensor of per-example sampled softmax losses. """ assert K.ndim(inputs) == 3 # time_steps, number_samples, input_dim nb_samples = K.cast(K.shape(inputs)[1], K.dtype(weights)) inputs = K.reshape(inputs, (-1, K.shape(inputs)[2])) labels = K.reshape(labels, (-1, 1)) labels = K.cast(labels, 'int64') ce = tf.nn.sampled_softmax_loss(weights=weights, biases=biases, inputs=inputs, labels=labels, num_sampled=num_sampled, num_classes=num_classes, num_true=num_true, sampled_values=sampled_values, remove_accidental_hits=remove_accidental_hits) if mask is not None: mask_flat = K.flatten(mask) # time_steps*nb_samples ce *= mask_flat print "--sum--sampled_softmax_loss" tmp = sum_op.Sum_op(keepdim=True, dimension=0)(ce) tmp = K.squeeze(tmp,0) #return K.sum(ce) / nb_samples return tmp / nb_samples
def ComplexBN(input_centred, Vrr, Vii, Vri, beta, gamma_rr, gamma_ri, gamma_ii, scale=True, center=True, layernorm=False, axis=-1): """Complex Batch Normalization Arguments: input_centred -- input data Vrr -- Real component of covariance matrix V Vii -- Imaginary component of covariance matrix V Vri -- Non-diagonal component of covariance matrix V beta -- Lernable shift parameter beta gamma_rr -- Scaling parameter gamma - rr component of 2x2 matrix gamma_ri -- Scaling parameter gamma - ri component of 2x2 matrix gamma_ii -- Scaling parameter gamma - ii component of 2x2 matrix Keyword Arguments: scale {bool} {bool} -- Standardization of input (default: {True}) center {bool} -- Mean-shift correction (default: {True}) layernorm {bool} -- Normalization (default: {False}) axis {int} -- Axis for Standardization (default: {-1}) Raises: ValueError: Dimonsional mismatch Returns: Batch-Normalized Input """ ndim = K.ndim(input_centred) input_dim = K.shape(input_centred)[axis] // 2 if scale: gamma_broadcast_shape = [1] * ndim gamma_broadcast_shape[axis] = input_dim if center: broadcast_beta_shape = [1] * ndim broadcast_beta_shape[axis] = input_dim * 2 if scale: standardized_output = complex_standardization(input_centred, Vrr, Vii, Vri, layernorm, axis=axis) # Now we perform th scaling and Shifting of the normalized x using # the scaling parameter # [ gamma_rr gamma_ri ] # Gamma = [ gamma_ri gamma_ii ] # and the shifting parameter # Beta = [beta_real beta_imag].T # where: # x_real_BN = gamma_rr * x_real_normed + # gamma_ri * x_imag_normed + beta_real # x_imag_BN = gamma_ri * x_real_normed + # gamma_ii * x_imag_normed + beta_imag broadcast_gamma_rr = K.reshape(gamma_rr, gamma_broadcast_shape) broadcast_gamma_ri = K.reshape(gamma_ri, gamma_broadcast_shape) broadcast_gamma_ii = K.reshape(gamma_ii, gamma_broadcast_shape) cat_gamma_4_real = K.concatenate( [broadcast_gamma_rr, broadcast_gamma_ii], axis=axis) cat_gamma_4_imag = K.concatenate( [broadcast_gamma_ri, broadcast_gamma_ri], axis=axis) if (axis == 1 and ndim != 3) or ndim == 2: centred_real = standardized_output[:, :input_dim] centred_imag = standardized_output[:, input_dim:] elif ndim == 3: centred_real = standardized_output[:, :, :input_dim] centred_imag = standardized_output[:, :, input_dim:] elif axis == -1 and ndim == 4: centred_real = standardized_output[:, :, :, :input_dim] centred_imag = standardized_output[:, :, :, input_dim:] elif axis == -1 and ndim == 5: centred_real = standardized_output[:, :, :, :, :input_dim] centred_imag = standardized_output[:, :, :, :, input_dim:] else: raise ValueError( 'Incorrect Batchnorm combination of axis and dimensions. axis' ' should be either 1 or -1. ' 'axis: ' + str(axis) + '; ndim: ' + str(ndim) + '.') rolled_standardized_output = K.concatenate( [centred_imag, centred_real], axis=axis) if center: broadcast_beta = K.reshape(beta, broadcast_beta_shape) return cat_gamma_4_real * standardized_output + cat_gamma_4_imag * rolled_standardized_output + broadcast_beta else: return cat_gamma_4_real * standardized_output + cat_gamma_4_imag * rolled_standardized_output else: if center: broadcast_beta = K.reshape(beta, broadcast_beta_shape) return input_centred + broadcast_beta else: return input_centred
def region_style_loss(style_image, target_image, style_mask, target_mask): assert 3 == K.ndim(style_image) == K.ndim(target_image) assert 2 == K.ndim(style_mask) == K.ndim(target_mask) s = gram_matrix(style_image * style_mask) * K.sum(style_mask) c = gram_matrix(target_image * target_mask) * K.sum(target_mask) return K.sum(K.square(s - c))
z = z_in z = Dense(z_dim, activation='relu')(z) z = Dense(z_dim, activation='relu')(z) z = Dense(z_dim, activation='relu')(z) z = Dense(1, activation='sigmoid')(z) print('z.shape', z.shape) LocalDiscriminator = Model(z_in, z) z_f_1_score = LocalDiscriminator(z_f_1) z_f_2_score = LocalDiscriminator(z_f_2) print('z_f_2_score.shape', z_f_2_score.shape) print('z_f_2.shape', z_f_2.shape) # local_info_loss = -K.mean(K.log(z_f_1_score+1e-6)+K.log(1-z_f_2_score+1e-6)) local_info_loss = -K.sum( K.log(z_f_1_score + 1e-6) + K.log(1 - z_f_2_score + 1e-6), axis=list(range(1, K.ndim(z_f_1_score)))) print('local_info_loss.shape', local_info_loss.shape) local_info_loss = K.mean(local_info_loss) print('local_info_loss', local_info_loss) # 用来训练模型 model_train = Model(x_in, [z_z_1_score, z_z_2_score, z_f_1_score, z_f_2_score]) model_train.add_loss(alpha * global_info_loss + beta * local_info_loss + gamma * prior_kl_loss) # model_train.add_loss(alpha*global_info_loss+gamma*prior_kl_loss) model_train.compile(optimizer=Adam(1e-3)) model_train.metrics_names.append('global_info_loss') model_train.metrics_tensors.append(global_info_loss) print('===============', model_train.metrics_names)
def update_state(self, values, sample_weight=None): """Accumulates statistics for computing the metric. Args: values: Per-example value. sample_weight: Optional weighting of each example. Defaults to 1. Returns: Update op. """ [ values ], sample_weight = metrics_utils.ragged_assert_compatible_and_get_flat_values( # noqa: E501 [values], sample_weight) try: values = tf.cast(values, self._dtype) except (ValueError, TypeError): msg = ( "The output of a metric function can only be a single Tensor. " f"Received: {values}. ") if isinstance(values, dict): msg += ( "To return a dict of values, implement a custom Metric " "subclass.") raise RuntimeError(msg) if sample_weight is not None: sample_weight = tf.cast(sample_weight, self._dtype) # Update dimensions of weights to match with values if possible. ( values, _, sample_weight, ) = losses_utils.squeeze_or_expand_dimensions( values, sample_weight=sample_weight) try: # Broadcast weights if possible. sample_weight = tf.__internal__.ops.broadcast_weights( sample_weight, values) except ValueError: # Reduce values to same ndim as weight array ndim = backend.ndim(values) weight_ndim = backend.ndim(sample_weight) if self.reduction == metrics_utils.Reduction.SUM: values = tf.reduce_sum(values, axis=list(range(weight_ndim, ndim))) else: values = tf.reduce_mean(values, axis=list(range(weight_ndim, ndim))) values = tf.multiply(values, sample_weight) value_sum = tf.reduce_sum(values) with tf.control_dependencies([value_sum]): update_total_op = self.total.assign_add(value_sum) # Exit early if the reduction doesn't have a denominator. if self.reduction == metrics_utils.Reduction.SUM: return update_total_op # Update `count` for reductions that require a denominator. if self.reduction == metrics_utils.Reduction.SUM_OVER_BATCH_SIZE: num_values = tf.cast(tf.size(values), self._dtype) elif self.reduction == metrics_utils.Reduction.WEIGHTED_MEAN: if sample_weight is None: num_values = tf.cast(tf.size(values), self._dtype) else: num_values = tf.reduce_sum(sample_weight) else: raise NotImplementedError( f'Reduction "{self.reduction}" not implemented. Expected ' '"sum", "weighted_mean", or "sum_over_batch_size".') with tf.control_dependencies([update_total_op]): return self.count.assign_add(num_values)
def gram_matrix(x): assert 3 == K.ndim(x) feats = K.batch_flatten(x) gram = K.dot(feats, K.transpose(feats)) return gram
def total_variation_loss(x): assert K.ndim(x) == 4 a = K.square(x[:, :, :img_width-1, :img_height-1] - x[:, :, 1:, :img_height-1]) b = K.square(x[:, :, :img_width-1, :img_height-1] - x[:, :, :img_width-1, 1:]) return K.sum(K.pow(a + b, 1.25))
def continuity_loss(x, img_shape): h, w = img_shape assert K.ndim(x) == 4 a = K.square(x[:, :, :h - 1, :w - 1] - x[:, :, 1:, :w - 1]) b = K.square(x[:, :, :h - 1, :w - 1] - x[:, :, :h - 1, 1:]) return K.sum(K.pow(a + b, 1.25))
def __init__(self, inputs, depth=None, nb_dense_block=4, growth_rate=12, nb_filter=-1, nb_layers_per_block=-1, bottleneck=False, reduction=0.0, dropout_rate=0.0, weight_decay=1e-4, subsample_initial_block=False, activation_conv='crelu', pooling_func=['max', 'global_average'], include_top=False, classes=None, output_activation=None, *args, **kwargs): concat_axis = -1 if K.image_data_format() == "channels_last" else 1 if reduction != 0.0: assert reduction <= 1.0 and reduction > 0.0, 'reduction value must lie between 0.0 and 1.0' # layers in each dense block if isinstance(nb_layers_per_block, (list, tuple)): nb_layers = list(nb_layers_per_block) # Convert tuple to list assert len(nb_layers) == (nb_dense_block), 'If list, nb_layer is used as provided. ' \ 'Note that list size must be (nb_dense_block)' final_nb_layer = nb_layers[-1] nb_layers = nb_layers[:-1] else: if nb_layers_per_block == -1: assert ( depth - 4 ) % 3 == 0, 'Depth must be 3 N + 4 if nb_layers_per_block == -1' count = int((depth - 4) / 3) if bottleneck: count = count // 2 nb_layers = [count for _ in range(nb_dense_block)] final_nb_layer = count else: final_nb_layer = nb_layers_per_block nb_layers = [nb_layers_per_block] * nb_dense_block # compute initial nb_filter if -1, else accept users initial nb_filter if nb_filter <= 0: nb_filter = 2 * growth_rate # compute compression factor compression = 1.0 - reduction # Initial convolution if subsample_initial_block: initial_kernel = 7 initial_strides = 2 else: initial_kernel = 3 initial_strides = 1 x_complex = ComplexConv1D( nb_filter, initial_kernel, strides=initial_strides, padding='same', use_bias=False, spectral_parametrization=False, kernel_regularizer=keras.regularizers.l2(weight_decay))(inputs) if subsample_initial_block: x_complex = ComplexBatchNormalization(axis=concat_axis, epsilon=1.1e-5)(x_complex) x_complex = layer_activation(x_complex, activation_conv) if pooling_func[0] == 'max': x_complex = ComplexMaxPooling1D(pool_size=3, strides=2, padding='same')(x_complex) elif pooling_func[0] == 'average': x_complex = ComplexAveragePooling1D(pool_size=3, strides=2, padding='same')(x_complex) # Add dense blocks for block_idx in range(nb_dense_block - 1): x_complex, nb_filter = dense1d_block(x_complex, nb_layers[block_idx], nb_filter, growth_rate, activation=activation_conv, bottleneck=bottleneck, dropout_rate=dropout_rate, weight_decay=weight_decay) # add transition_block x_complex = transition1d_block(x_complex, nb_filter, activation=activation_conv, compression=compression, weight_decay=weight_decay) nb_filter = int(nb_filter * compression) # The last dense_block does not have a transition_block x_complex, nb_filter = dense1d_block(x_complex, final_nb_layer, nb_filter, growth_rate, activation=activation_conv, bottleneck=bottleneck, dropout_rate=dropout_rate, weight_decay=weight_decay) x_complex = ComplexBatchNormalization(axis=concat_axis, epsilon=1.1e-5)(x_complex) x_complex = layer_activation(x_complex, activation_conv) if include_top: assert classes > 0 if pooling_func[1] == 'global_average': x_complex = keras.layers.GlobalAveragePooling1D( name="pool5")(x_complex) elif pooling_func[1] == 'complex_average': x_complex = ComplexAveragePooling1D(name='pool5')(x_complex) elif pooling_func[1] == 'complex_max': x_complex = ComplexMaxPooling1D(name='pool5')(x_complex) elif pooling_func[1] == 'spectral_average': x_complex = SpectralPooling1D(gamma=[0.25, 0.25], name='pool5')(x_complex) if output_activation is None: output_activation = 'softmax' if K.ndim(x_complex) > 2: x_complex = keras.layers.Flatten()(x_complex) if output_activation.startswith('complex_'): output_activation = output_activation[len('complex_'):] x = ComplexDense(classes, activation=output_activation)(x_complex) else: x = keras.layers.Dense(classes, activation=output_activation)(x_complex) else: x = x_complex super(DenseNet1D, self).__init__(inputs=inputs, outputs=x, *args, **kwargs)
def embed_input(self, input_layer: 'keras.layers.Layer', text_trainer: 'TextTrainer', embedding_name: str="embedding"): """ A combined word-and-characters representation requires some fancy footwork to do the embedding properly. This method assumes the input shape is (..., sentence_length, word_length + 1), where the first integer for each word in the tensor is the word index, and the remaining word_length entries is the character sequence. We'll first split this into two tensors, one of shape (..., sentence_length), and one of shape (..., sentence_length, word_length), where the first is the word sequence, and the second is the character sequence for each word. We'll pass the word sequence through an embedding layer, as normal, and pass the character sequence through a _separate_ embedding layer, then an encoder, to get a word vector out. We'll then concatenate the two word vectors, returning a tensor of shape (..., sentence_length, embedding_dim * 2). """ # pylint: disable=protected-access # So that we end up with even embeddings across different inputs, we'll use half the # `embedding_size` in the given `TextTrainer`. embedding_size = int(text_trainer.embedding_size / 2) # This is happening before any masking is done, so we don't need to worry about the # mask_split_axis argument to VectorMatrixSplit. words, characters = VectorMatrixSplit(split_axis=-1)(input_layer) word_embedding = text_trainer._get_embedded_input(words, embedding_size=embedding_size, embedding_name='word_' + embedding_name, vocab_name='words') character_embedding = text_trainer._get_embedded_input(characters, embedding_size=embedding_size, embedding_name='character_' + embedding_name, vocab_name='characters') # A note about masking here: we care about the character masks when encoding a character # sequence, so we need the mask to be passed to the character encoder correctly. However, # we _don't_ care here about whether the whole word will be masked, as the word_embedding # will carry that information, so the output mask returned by the TimeDistributed layer # here will be ignored. word_encoder = TimeDistributed( text_trainer._get_encoder(name="word", fallback_behavior="use default params")) # We might need to TimeDistribute this again, if our input has ndim higher than 3. for _ in range(3, K.ndim(characters)): word_encoder = TimeDistributed(word_encoder, name="timedist_" + word_encoder.name) word_encoding = word_encoder(character_embedding) merge_mode = lambda inputs: K.concatenate(inputs, axis=-1) def merge_shape(input_shapes): output_shape = list(input_shapes[0]) output_shape[-1] += input_shapes[1][-1] return tuple(output_shape) merge_mask = lambda masks: masks[0] # If you're embedding multiple inputs in your model, we need the final merge layer here to # have a unique name each time. In order to get a unique name, we use the name of the # input layer. Except sometimes Keras adds funny things to the end of the input layer, so # we'll strip those off. input_name = input_layer.name if ':' in input_name: input_name = input_name.split(':')[0] if input_name.split('_')[-1].isdigit(): input_name = '_'.join(input_name.split('_')[:-1]) final_embedded_input = merge([word_embedding, word_encoding], mode=merge_mode, output_shape=merge_shape, output_mask=merge_mask, name='combined_word_embedding_for_' + input_name) return final_embedded_input
#define the step for gradient ascent step = 0.5 #create a dictionary with each layer's name for convenience layer_dict = dict([(layer.name, layer) for layer in model.layers]) #start with initial input, obviously #print(model.layers[0].input) input_img = model.layers[0].input #Note: the layer index would be -1 instead of 0 but this network uses the first #convolutional layer as the input instead of a fully-conected layer_output = layer_dict[layer_name].output print(backend.int_shape(layer_output)) print(backend.ndim(layer_output)) print(backend.shape(layer_output)) #layer_output = model.layers[0].output for n in range(0,0): filter_index = n print('Processing filter %d' % filter_index) start_time = time.time() #build a loss function that maximizes the activation of n filters of the layer considered #TODO: select one of these loss function depending on which layer you're using #loss = backend.mean(layer_output[:, filter_index]) # loss function for dense layers loss = backend.mean(layer_output[:,filter_index,:,:]) # for non-dense layers #compute gradient of input picture wrt this loss grads = backend.gradients(loss, input_img)[0] #normalize gradient to avoid values
def call(self, x, mask=None): if mask is not None: if K.ndim(x) != K.ndim(mask):
def softmax_over_time(x): assert (K.ndim(x) > 2) e = K.exp(x - K.max(x, axis=1, keepdims=True)) s = K.sum(e, axis=1, keepdims=True) return e / s
def gram_matrix(x): assert K.ndim(x) == 3 features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1))) gram = K.dot(features, K.transpose(features)) return gram
def getResidualBlock(I, filter_size, featmaps, stage, block, shortcut, convArgs, bnArgs, d): """Get residual block.""" activation = d.act drop_prob = d.dropout nb_fmaps1, nb_fmaps2 = featmaps conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' if K.image_data_format() == 'channels_first' and K.ndim(I) != 3: channel_axis = 1 else: channel_axis = -1 if d.model == "real": O = BatchNormalization(name=bn_name_base + '_2a', **bnArgs)(I) elif d.model == "complex": O = ComplexBN(name=bn_name_base + '_2a', **bnArgs)(I) O = Activation(activation)(O) if shortcut == 'regular' or d.spectral_pool_scheme == "nodownsample": if d.model == "real": O = Conv2D(nb_fmaps1, filter_size, name=conv_name_base + '2a', **convArgs)(O) elif d.model == "complex": O = ComplexConv2D(nb_fmaps1, filter_size, name=conv_name_base + '2a', **convArgs)(O) elif shortcut == 'projection': if d.spectral_pool_scheme == "proj": O = applySpectralPooling(O, d) if d.model == "real": O = Conv2D(nb_fmaps1, filter_size, name=conv_name_base + '2a', strides=(2, 2), **convArgs)(O) elif d.model == "complex": O = ComplexConv2D(nb_fmaps1, filter_size, name=conv_name_base + '2a', strides=(2, 2), **convArgs)(O) if d.model == "real": O = BatchNormalization(name=bn_name_base + '_2b', **bnArgs)(O) O = Activation(activation)(O) O = Conv2D(nb_fmaps2, filter_size, name=conv_name_base + '2b', **convArgs)(O) elif d.model == "complex": O = ComplexBN(name=bn_name_base + '_2b', **bnArgs)(O) O = Activation(activation)(O) O = ComplexConv2D(nb_fmaps2, filter_size, name=conv_name_base + '2b', **convArgs)(O) if shortcut == 'regular': O = Add()([O, I]) elif shortcut == 'projection': if d.spectral_pool_scheme == "proj": I = applySpectralPooling(I, d) if d.model == "real": X = Conv2D( nb_fmaps2, (1, 1), name=conv_name_base + '1', strides=(2, 2) if d.spectral_pool_scheme != "nodownsample" else (1, 1), **convArgs)(I) O = Concatenate(channel_axis)([X, O]) elif d.model == "complex": X = ComplexConv2D( nb_fmaps2, (1, 1), name=conv_name_base + '1', strides=(2, 2) if d.spectral_pool_scheme != "nodownsample" else (1, 1), **convArgs)(I) O_real = Concatenate(channel_axis)([GetReal()(X), GetReal()(O)]) O_imag = Concatenate(channel_axis)([GetImag()(X), GetImag()(O)]) O = Concatenate(1)([O_real, O_imag]) return O
def total_variation_loss(yTrue, yPred): assert K.ndim(yTrue) == 4 diff = yTrue - yPred a = K.square(diff[:, :res - 1, :res - 1, :] - diff[:, 1:, :res - 1, :]) b = K.square(diff[:, :res - 1, :res - 1, :] - diff[:, :res - 1, 1:, :]) return K.mean(K.pow(a + b, 2)) # tweak the power?
def call(self, inputs, training=None): assert self.built, 'Layer must be built before being called' input_shape = K.int_shape(inputs) reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] mean_batch, var_batch = _moments(inputs, reduction_axes, shift=None, keep_dims=False) std_batch = (K.sqrt(var_batch + self.epsilon)) r = std_batch / (K.sqrt(self.running_variance + self.epsilon)) r = K.stop_gradient(K.clip(r, 1 / self.r_max, self.r_max)) d = (mean_batch - self.running_mean) / K.sqrt(self.running_variance + self.epsilon) d = K.stop_gradient(K.clip(d, -self.d_max, self.d_max)) if sorted(reduction_axes) == range(K.ndim(inputs))[:-1]: x_normed_batch = (inputs - mean_batch) / std_batch x_normed = (x_normed_batch * r + d) * self.gamma + self.beta else: # need broadcasting broadcast_mean = K.reshape(mean_batch, broadcast_shape) broadcast_std = K.reshape(std_batch, broadcast_shape) broadcast_r = K.reshape(r, broadcast_shape) broadcast_d = K.reshape(d, broadcast_shape) broadcast_beta = K.reshape(self.beta, broadcast_shape) broadcast_gamma = K.reshape(self.gamma, broadcast_shape) x_normed_batch = (inputs - broadcast_mean) / broadcast_std x_normed = (x_normed_batch * broadcast_r + broadcast_d) * broadcast_gamma + broadcast_beta # explicit update to moving mean and standard deviation mean_update = K.moving_average_update(self.running_mean, mean_batch, self.momentum) variance_update = K.moving_average_update(self.running_variance, std_batch**2, self.momentum) self.add_update([mean_update, variance_update], inputs) # update r_max and d_max r_val = self.r_max_value / (1 + (self.r_max_value - 1) * K.exp(-self.t)) d_val = (self.d_max_value / (1 + ((self.d_max_value / 1e-3) - 1) * K.exp(-(2 * self.t)))) self.add_update([ K.update(self.r_max, r_val), K.update(self.d_max, d_val), K.update_add(self.t, self.t_delta_tensor) ], inputs) if training in {0, False}: return x_normed else: def normalize_inference(): if sorted(reduction_axes) == list(range(K.ndim(inputs)))[:-1]: x_normed_running = K.batch_normalization( inputs, self.running_mean, self.running_variance, self.beta, self.gamma, epsilon=self.epsilon) return x_normed_running else: # need broadcasting broadcast_running_mean = K.reshape(self.running_mean, broadcast_shape) broadcast_running_std = K.reshape(self.running_variance, broadcast_shape) broadcast_beta = K.reshape(self.beta, broadcast_shape) broadcast_gamma = K.reshape(self.gamma, broadcast_shape) x_normed_running = K.batch_normalization( inputs, broadcast_running_mean, broadcast_running_std, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) return x_normed_running # pick the normalized form of inputs corresponding to the training phase # for batch renormalization, inference time remains same as batchnorm x_normed = K.in_train_phase(x_normed, normalize_inference, training=training) return x_normed
def call(self, inputs, mask=None): if type(inputs) is not list or len(inputs) != 2: raise Exception('terminal gru runs on list of length 2') X = inputs[0] true_seq = inputs[1] if self.stateful: initial_states = self.states else: initial_states = self.get_initial_states(X) # preprocessing makes input into right form for gpu/cpu settings # from original GRU code recurrent_dropout_constants = self.get_constants(X)[0] preprocessed_input = self.preprocess_input(X) ################# ## Section for index matching of true inputs ################# # Basically, we need to add an extra timestep of just 0s for predicting the first timestep output axes = [1, 0] + list(range(2, K.ndim(true_seq))) true_seq = K.permute_dimensions(true_seq, axes) zeros = K.zeros_like(true_seq[:1, :, :]) # add a column of zeros, remove last element true_seq = K.concatenate( [zeros, true_seq[:K.int_shape(true_seq)[0] - 1, :, :]], axis=0) shifted_raw_inputs = K.permute_dimensions(true_seq, axes) ## concatenate to have same dimension as preprocessed inputs 3xoutput_dim # only for self.implementation = 0? shifted_raw_inputs = K.concatenate( [shifted_raw_inputs, shifted_raw_inputs, shifted_raw_inputs], axis=2) all_inputs = K.stack([preprocessed_input, shifted_raw_inputs]) num_dim = K.ndim(all_inputs) axes = [1, 2, 0] + list(range(3, num_dim)) all_inputs = K.permute_dimensions(all_inputs, axes) # If not using true sequence, want to feed in a tensor of zeros instead. zeros_input_seq = K.zeros_like(preprocessed_input) test_phase_all_inputs = K.stack([preprocessed_input, zeros_input_seq]) test_phase_all_inputs = K.permute_dimensions(test_phase_all_inputs, axes) all_inputs = K.in_train_phase(all_inputs, test_phase_all_inputs) last_output, outputs, states = sampled_rnn( self.step, all_inputs, initial_states, self.units, self.rnd_seed, go_backwards=self.go_backwards, rec_dp_constants=recurrent_dropout_constants, mask=None) if self.return_sequences: return outputs else: return last_output
def repeat_(x, k): tile_factor = [1, k] + [1] * (kb.ndim(x) - 1) return kb.tile(x[:, None, :], tile_factor)