def call(self, x, mask=None): assert self.built, 'Layer must be built before being called' input_shape = K.int_shape(x) reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] if sorted(reduction_axes) == range(K.ndim(x))[:-1]: x_normed = K.batch_normalization(x, self.running_mean, self.running_std, self.beta, self.gamma, epsilon=self.epsilon) else: # need broadcasting broadcast_running_mean = K.reshape(self.running_mean, broadcast_shape) broadcast_running_std = K.reshape(self.running_std, broadcast_shape) broadcast_beta = K.reshape(self.beta, broadcast_shape) broadcast_gamma = K.reshape(self.gamma, broadcast_shape) x_normed = K.batch_normalization(x, broadcast_running_mean, broadcast_running_std, broadcast_beta, broadcast_gamma, epsilon=self.epsilon, axis=0) return x_normed
def normalize_inference(): if sorted(reduction_axes) == list(range(K.ndim(inputs)))[:-1]: x_normed_running = K.batch_normalization( inputs, self.running_mean, self.running_variance, self.beta, self.gamma, epsilon=self.epsilon) return x_normed_running else: # need broadcasting broadcast_running_mean = K.reshape(self.running_mean, broadcast_shape) broadcast_running_std = K.reshape(self.running_variance, broadcast_shape) broadcast_beta = K.reshape(self.beta, broadcast_shape) broadcast_gamma = K.reshape(self.gamma, broadcast_shape) x_normed_running = K.batch_normalization( inputs, broadcast_running_mean, broadcast_running_std, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) return x_normed_running
def call(self, x, mask=None): if self.mode == 0 or self.mode == 2: assert self.built, 'Layer must be built before being called' input_shape = self.input_spec[0].shape reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] if self.mode == 2: x_normed, mean, std = K.normalize_batch_in_training( x, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon) else: # mode 0 if self.called_with not in {None, x} and False: raise Exception('You are attempting to share a ' 'same `BatchNormalization` layer across ' 'different data flows. ' 'This is not possible. ' 'You should use `mode=2` in ' '`BatchNormalization`, which has ' 'a similar behavior but is shareable ' '(see docs for a description of ' 'the behavior).') self.called_with = x x_normed, mean, std = K.normalize_batch_in_training( x, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon) self.updates = [K.moving_average_update(self.running_mean, mean, self.momentum), K.moving_average_update(self.running_std, std, self.momentum)] if sorted(reduction_axes) == range(K.ndim(x))[:-1]: x_normed_running = K.batch_normalization( x, self.running_mean, self.running_std, self.beta, self.gamma, epsilon=self.epsilon) else: # need broadcasting broadcast_running_mean = K.reshape(self.running_mean, broadcast_shape) broadcast_running_std = K.reshape(self.running_std, broadcast_shape) broadcast_beta = K.reshape(self.beta, broadcast_shape) broadcast_gamma = K.reshape(self.gamma, broadcast_shape) x_normed_running = K.batch_normalization( x, broadcast_running_mean, broadcast_running_std, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) # pick the normalized form of x corresponding to the training phase x_normed = K.in_train_phase(x_normed, x_normed_running) elif self.mode == 1: # sample-wise normalization m = K.mean(x, axis=-1, keepdims=True) std = K.sqrt(K.var(x, axis=-1, keepdims=True) + self.epsilon) x_normed = (x - m) / (std + self.epsilon) x_normed = self.gamma * x_normed + self.beta return x_normed
def normalize_inference(): if needs_broadcasting: # In this case we must explicitly broadcast all parameters. broadcast_moving_mean = K.reshape(self.moving_mean, broadcast_shape) broadcast_moving_variance = K.reshape(self.moving_variance, broadcast_shape) if self.center: broadcast_beta = K.reshape(self.beta, broadcast_shape) else: broadcast_beta = None if self.scale: broadcast_gamma = K.reshape(self.gamma, broadcast_shape) else: broadcast_gamma = None return K.batch_normalization( inputs, broadcast_moving_mean, broadcast_moving_variance, broadcast_beta, broadcast_gamma, axis=self.axis, epsilon=self.epsilon) else: return K.batch_normalization( inputs, self.moving_mean, self.moving_variance, self.beta, self.gamma, axis=self.axis, epsilon=self.epsilon)
def call(self, x, mask=None): assert self.built, 'Layer must be built before being called' input_shape = K.int_shape(x) reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] if sorted(reduction_axes) == range(K.ndim(x))[:-1]: x_normed = K.batch_normalization( x, self.running_mean, self.running_std, self.beta, self.gamma, epsilon=self.epsilon) else: # need broadcasting broadcast_running_mean = K.reshape(self.running_mean, broadcast_shape) broadcast_running_std = K.reshape(self.running_std, broadcast_shape) broadcast_beta = K.reshape(self.beta, broadcast_shape) broadcast_gamma = K.reshape(self.gamma, broadcast_shape) x_normed = K.batch_normalization( x, broadcast_running_mean, broadcast_running_std, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) return x_normed
def normalize_inference(): if needs_broadcasting: # In this case we must explicitly broadcast all parameters. broadcast_moving_mean = K.reshape(self.moving_mean, broadcast_shape) broadcast_moving_variance = K.reshape(self.moving_variance, broadcast_shape) if self.center: broadcast_beta = K.reshape(self.beta, broadcast_shape) else: broadcast_beta = None if self.scale: broadcast_gamma = K.reshape(self.gamma, broadcast_shape) else: broadcast_gamma = None return K.batch_normalization( inputs, broadcast_moving_mean, broadcast_moving_variance, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) else: return K.batch_normalization( inputs, self.moving_mean, self.moving_variance, self.beta, self.gamma, epsilon=self.epsilon)
def call(self, x, mask=None): if self.mode == 0 or self.mode == 2: assert self.built, 'Layer must be built before being called' input_shape = K.int_shape(x) reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] x_normed, mean, std = K.normalize_batch_in_training( x, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon) if self.mode == 0: self.add_update([ K.moving_average_update(self.running_mean, mean, self.momentum), K.moving_average_update(self.running_std, std, self.momentum) ], x) if sorted(reduction_axes) == range(K.ndim(x))[:-1]: x_normed_running = K.batch_normalization( x, self.running_mean, self.running_std, self.beta, self.gamma, epsilon=self.epsilon) else: # need broadcasting broadcast_running_mean = K.reshape(self.running_mean, broadcast_shape) broadcast_running_std = K.reshape(self.running_std, broadcast_shape) broadcast_beta = K.reshape(self.beta, broadcast_shape) broadcast_gamma = K.reshape(self.gamma, broadcast_shape) x_normed_running = K.batch_normalization( x, broadcast_running_mean, broadcast_running_std, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) # pick the normalized form of x corresponding to the training phase x_normed = K.in_train_phase(x_normed, x_normed_running) elif self.mode == 1: # sample-wise normalization m = K.mean(x, axis=-1, keepdims=True) std = K.sqrt(K.var(x, axis=-1, keepdims=True) + self.epsilon) x_normed = (x - m) / (std + self.epsilon) x_normed = self.gamma * x_normed + self.beta else: return None return x_normed
def call(self, inputs, training=None): if int(tf.__version__.split('.')[0]) < 2: mean, var = tf.nn.moments(inputs, axes=[1, 2], keep_dims=True) out_layer = K.batch_normalization(inputs, mean, var, self.beta, self.gamma, self.epsilon) else: mean, var = tf.nn.moments(inputs, axes=[0, 1, 2], keepdims=True) out_layer = K.batch_normalization(inputs, mean, var, self.beta, self.gamma, -1, self.epsilon) return out_layer
def call(self, x, mask=None): output = K.conv2d(x, self.W, strides=self.subsample, border_mode=self.border_mode, dim_ordering=self.dim_ordering, filter_shape=self.W_shape) # added for batch normalization input_shape = K.int_shape(output) axis = 1 reduction_axes = list(range(len(input_shape))) del reduction_axes[axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[axis] = input_shape[axis] output_normed, mean, std = K.normalize_batch_in_training( output, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon) self.add_update([K.moving_average_update(self.running_mean, mean, self.momentum), K.moving_average_update(self.running_std, std, self.momentum)], output) if sorted(reduction_axes) == range(K.ndim(output))[:-1]: output_normed_running = K.batch_normalization( output, self.running_mean, self.running_std, self.beta, self.gamma, epsilon=self.epsilon) else: # need broadcasting broadcast_running_mean = K.reshape(self.running_mean, broadcast_shape) broadcast_running_std = K.reshape(self.running_std, broadcast_shape) broadcast_beta = K.reshape(self.beta, broadcast_shape) broadcast_gamma = K.reshape(self.gamma, broadcast_shape) output_normed_running = K.batch_normalization( output, broadcast_running_mean, broadcast_running_std, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) # pick the normalized form of output corresponding to the training phase output_normed = K.in_train_phase(output_normed, output_normed_running) if self.bias: if self.dim_ordering == 'th': output_normed += K.reshape(self.b, (1, self.nb_filter, 1, 1)) elif self.dim_ordering == 'tf': output_normed += K.reshape(self.b, (1, 1, 1, self.nb_filter)) else: raise ValueError('Invalid dim_ordering:', self.dim_ordering) output = self.activation(output_normed) return output
def tlayer(self, O, T, W, P, K, D, b): if self.normalize: O = BE.batch_normalization(x=O, mean=BE.mean(O), var=BE.var(O), gamma=1., beta=0., epsilon=0.0001) P = BE.batch_normalization(x=P, mean=BE.mean(P), var=BE.var(P), gamma=1., beta=0., epsilon=0.0001) T_ = BE.reshape(T, [D, D * K]) OT = BE.dot(O, T_) OT = BE.reshape(OT, [-1, D, K]) P_ = BE.reshape(P, [-1, D, 1]) OTP = BE.batch_dot(OT, P_, axes=(1, 1)) OP = BE.concatenate([O, P], axis=1) W_ = BE.transpose(W) WOP = BE.dot(OP, W_) WOP = BE.reshape(WOP, [-1, K, 1]) b_ = BE.reshape(b, [K, 1]) S = merge([OTP, WOP, b_], mode='sum') S_ = BE.reshape(S, [-1, K]) R = BE.tanh(S_) # print('O shape: ', BE.int_shape(O)) # print('T_ shape: ', BE.int_shape(T_)) # print('OT shape:', BE.int_shape(OT)) # print('P shape: ', BE.int_shape(P)) # print('P_ shape: ', BE.int_shape(P_)) # print('OTP shape:', BE.int_shape(OTP)) # print('OP shape: ', BE.int_shape(OP)) # print('WOP shape: ', BE.int_shape(WOP)) # print('WOP reshape: ', BE.int_shape(WOP)) # print('b_ shape: ', BE.int_shape(b_)) # print('S shape: ', BE.int_shape(S)) # print('S_ shape: ', BE.int_shape(S_)) return R
def normalize_inference(): return K.batch_normalization(x, moving_mean, moving_var, weights[1], weights[0], epsilon=layer["epsilon"])
def normalize_training(): return K.batch_normalization(x, self.mean, self.variance, weights[1], weights[0], epsilon=layer["epsilon"])
def call(self, inputs, training=None): input_shape = K.int_shape(inputs) # Prepare broadcasting shape. ndim = len(input_shape) reduction_axes = [ ax for ax in range(len(input_shape)) if ax not in self.normalization_axis ] mean = K.mean(inputs, axis=reduction_axes, keepdims=True) pooled_mean = K.pool2d(mean, tuple(self.kernel_size), padding='same', pool_mode='avg', data_format=self.data_format) var = K.mean(K.pow(inputs - pooled_mean, 2), axis=reduction_axes, keepdims=True) pooled_var = K.pool2d(var, tuple(self.kernel_size), padding='same', pool_mode='avg', data_format=self.data_format) return K.batch_normalization(inputs, pooled_mean, pooled_var, self.broadcast_beta, self.broadcast_gamma, epsilon=self.epsilon)
def layer_norm(l): mask = K.cast(K.not_equal(l, 0), dtype=tf.float32) N = K.sum(mask) mean = K.sum(l) / N variance = K.sum(K.square((l - mean) * mask)) return K.batch_normalization(l, mean, variance, None, None, epsilon=1E-12)
def call(self, x, mask=None): assert self.built, 'Layer must be built before being called' # 设定输入规格 input_shape = K.int_shape(x) # 设置channel轴 reduction_axes = list(range(len(input_shape))) # 删除末位以对齐 del reduction_axes[self.axis] # 创建一个与input_shape相同长度的广播数组 broadcast_shape = [1] * len(input_shape) # 修改广播数组的最后维度与input_shape相同 broadcast_shape[self.axis] = input_shape[self.axis] # 如果降维后与x的维度相同 if sorted(reduction_axes) == range(K.ndim(x))[:-1]: # 则不需要对数组进行广播操作 x_normed = K.batch_normalization(x, self.running_mean, self.running_std, self.beta, self.gamma, epsilon=self.epsilon) else: # 否则进行广播以对其BN中各项参数(均值、方差、偏移参数) broadcast_running_mean = K.reshape(self.running_mean, broadcast_shape) broadcast_running_std = K.reshape(self.running_std, broadcast_shape) broadcast_beta = K.reshape(self.beta, broadcast_shape) broadcast_gamma = K.reshape(self.gamma, broadcast_shape) x_normed = K.batch_normalization(x, broadcast_running_mean, broadcast_running_std, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) # 返回BN层 return x_normed
def __init__(self, layer): self.layer = layer input = K.placeholder(shape = layer.input_shape) output = K.batch_normalization( input, layer.running_mean, layer.running_std, layer.beta, layer.gamma, epsilon=layer.epsilon) self.up_func = K.function( [input, K.learning_phase()], [output]) output = (input - layer.beta) / layer.gamma * (K.sqrt(layer.running_std) + layer.epsilon) + layer.running_mean self.down_func = K.function(input, [output])
def normalize_inference(): normed_inference, mean, variance = K.normalize_batch_in_training( inputs, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon ) if needs_broadcasting: # In this case we must explicitly broadcast all parameters. broadcast_mean = K.reshape(mean, broadcast_shape) broadcast_variance = K.reshape(variance, broadcast_shape) if self.center: broadcast_beta = K.reshape(self.beta, broadcast_shape) else: broadcast_beta = None if self.scale: broadcast_gamma = K.reshape(self.gamma, broadcast_shape) else: broadcast_gamma = None return K.batch_normalization( inputs, broadcast_mean, broadcast_variance, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) else: return K.batch_normalization( inputs, mean, variance, self.beta, self.gamma, epsilon=self.epsilon)
def apply_mode_normalization_inference(moving_mean, moving_variance, beta, gamma): inputs_mul_gates_ = self.apply_gates(inputs, input_shape, reduction_axes[1:]) outputs = [] for k_ in range(self.k): outputs.append( K.batch_normalization(inputs_mul_gates_[:, k_], moving_mean[k_], moving_variance[k_], beta / self.k, gamma, axis=self.axis, epsilon=self.epsilon)) return K.sum(K.stack(outputs, axis=0), axis=0)
def call(self, inputs, training=None): x = inputs assert not isinstance(x, list) # Do the normalization and the rescaling xnorm = K.batch_normalization(x, self.moving_mean, self.moving_variance, self.beta, self.gamma, epsilon=self.epsilon) # Compute and update the minibatch statistics if self.update_stats: mean, var = self._moments(x, axes=range(len(K.int_shape(x)) - 1)) self.add_update([ K.moving_average_update(self.moving_mean, mean, self.momentum), K.moving_average_update(self.moving_variance, var, self.momentum) ], x) return xnorm
def batch_norm(inputs, gamma, beta, dims, ind): """ Normalize batch and update moving averages for mean and std Input: inputs: (batchsize, n_points, k, n_features * 2) - edge_features gamma: weight - gamma for batch normalization beta: weight - beta for batch normalization dims: list - dimensions along which to normalize ind: int - indicating which weights to use Returns: During training: normed: (batchsize, n_points, k, n_features * 2) - normalized batch of data using actual batch for normalization Else: normed_moving: same, but using the updated average values """ # Calculate normalized data, mean and std for batch normed, batch_mean, batch_var = K.normalize_batch_in_training( x=inputs, gamma=gamma, beta=beta, reduction_axes=dims) # Update the moving averages self.add_update([ K.moving_average_update(self.moving_mean[ind], batch_mean, 0.9), K.moving_average_update(self.moving_var[ind], batch_var, 0.9)]) # Calculate normalization using the averages normed_moving = K.batch_normalization( x=inputs, mean=self.moving_mean[ind], var=self.moving_var[ind], beta=beta, gamma=gamma) # If training return normed, else normed_moving return K.in_train_phase(normed, normed_moving)
def call(self, inputs, training=None): mean, var = tf.nn.moments(inputs, axes=[2], keep_dims=True) return K.batch_normalization(inputs, mean, var, self.beta, self.gamma, self.epsilon)
def call(self, x, mask=None): if self.mode == 0 or self.mode == 2: assert self.built, 'Layer must be built before being called' input_shape = K.int_shape(x) reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] mean_batch, var_batch = K.moments(x, reduction_axes, shift=None, keep_dims=False) std_batch = (K.sqrt(var_batch + self.epsilon)) r_max_value = K.get_value(self.r_max) r = std_batch / (K.sqrt(self.running_std + self.epsilon)) r = K.stop_gradient(K.clip(r, 1 / r_max_value, r_max_value)) d_max_value = K.get_value(self.d_max) d = (mean_batch - self.running_mean) / K.sqrt(self.running_std + self.epsilon) d = K.stop_gradient(K.clip(d, -d_max_value, d_max_value)) if sorted(reduction_axes) == range(K.ndim(x))[:-1]: x_normed_batch = (x - mean_batch) / std_batch x_normed = (x_normed_batch * r + d) * self.gamma + self.beta else: # need broadcasting broadcast_mean = K.reshape(mean_batch, broadcast_shape) broadcast_std = K.reshape(std_batch, broadcast_shape) broadcast_r = K.reshape(r, broadcast_shape) broadcast_d = K.reshape(d, broadcast_shape) broadcast_beta = K.reshape(self.beta, broadcast_shape) broadcast_gamma = K.reshape(self.gamma, broadcast_shape) x_normed_batch = (x - broadcast_mean) / broadcast_std x_normed = (x_normed_batch * broadcast_r + broadcast_d) * broadcast_gamma + broadcast_beta # explicit update to moving mean and standard deviation self.add_update([ K.moving_average_update(self.running_mean, mean_batch, self.momentum), K.moving_average_update(self.running_std, std_batch**2, self.momentum) ], x) # update r_max and d_max t_val = K.get_value(self.t) r_val = self.r_max_value / ( 1 + (self.r_max_value - 1) * np.exp(-t_val)) d_val = self.d_max_value / (1 + ( (self.d_max_value / 1e-3) - 1) * np.exp(-(2 * t_val))) t_val += float(self.t_delta) self.add_update([ K.update(self.r_max, r_val), K.update(self.d_max, d_val), K.update(self.t, t_val) ], x) if self.mode == 0: if sorted(reduction_axes) == range(K.ndim(x))[:-1]: x_normed_running = K.batch_normalization( x, self.running_mean, self.running_std, self.beta, self.gamma, epsilon=self.epsilon) else: # need broadcasting broadcast_running_mean = K.reshape(self.running_mean, broadcast_shape) broadcast_running_std = K.reshape(self.running_std, broadcast_shape) broadcast_beta = K.reshape(self.beta, broadcast_shape) broadcast_gamma = K.reshape(self.gamma, broadcast_shape) x_normed_running = K.batch_normalization( x, broadcast_running_mean, broadcast_running_std, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) # pick the normalized form of x corresponding to the training phase # for batch renormalization, inference time remains same as batchnorm x_normed = K.in_train_phase(x_normed, x_normed_running) elif self.mode == 1: # sample-wise normalization m = K.mean(x, axis=self.axis, keepdims=True) std = K.sqrt( K.var(x, axis=self.axis, keepdims=True) + self.epsilon) x_normed_batch = (x - m) / (std + self.epsilon) r_max_value = K.get_value(self.r_max) r = std / (self.running_std + self.epsilon) r = K.stop_gradient(K.clip(r, 1 / r_max_value, r_max_value)) d_max_value = K.get_value(self.d_max) d = (m - self.running_mean) / (self.running_std + self.epsilon) d = K.stop_gradient(K.clip(d, -d_max_value, d_max_value)) x_normed = ((x_normed_batch * r) + d) * self.gamma + self.beta # update r_max and d_max t_val = K.get_value(self.t) r_val = self.r_max_value / ( 1 + (self.r_max_value - 1) * np.exp(-t_val)) d_val = self.d_max_value / (1 + ( (self.d_max_value / 1e-3) - 1) * np.exp(-(2 * t_val))) t_val += float(self.t_delta) self.add_update([ K.update(self.r_max, r_val), K.update(self.d_max, d_val), K.update(self.t, t_val) ], x) return x_normed
def call(self, x): atom_fea, nbr_fea, nbr_fea_idx, mask = x _, N, M = nbr_fea_idx.shape atom_nbr_fea = tf.gather(atom_fea, indices=nbr_fea_idx, axis=1, batch_dims=1) atom_fea_expanded = tf.tile(tf.expand_dims(atom_fea, axis=2), [1, 1, M, 1]) total_nbr_fea = tf.concat([atom_fea_expanded, atom_nbr_fea, nbr_fea], axis=3) total_gated_fea = K.dot(total_nbr_fea, self.gc_W) + self.gc_bias total_gated_fea = total_gated_fea * K.cast(mask, tf.float32) # batch norm 1 total_gated_fea = K.reshape(total_gated_fea, (-1, 2 * self.atom_fea_len)) mask_stacked_1 = K.reshape(mask, (-1, 2 * self.atom_fea_len)) mu_1 = tf.reduce_sum(total_gated_fea) / tf.math.count_nonzero( total_gated_fea, dtype=tf.float32) diff_squared_1 = (total_gated_fea - mu_1)**2 * K.cast( mask_stacked_1, tf.float32) var_1 = K.sum(diff_squared_1) / tf.math.count_nonzero(total_gated_fea, dtype=tf.float32) total_gated_fea = K.batch_normalization(total_gated_fea, mu_1, var_1, self.beta_1, self.gamma_1, epsilon=1e-5) total_gated_fea = K.reshape(total_gated_fea, (-1, N, M, 2 * self.atom_fea_len)) total_gated_fea = total_gated_fea * K.cast(mask, tf.float32) nbr_filter, nbr_core = tf.split(total_gated_fea, 2, axis=3) nbr_filter = K.sigmoid(nbr_filter) nbr_core = K.softplus(nbr_core) nbr_summed = K.sum(nbr_filter * nbr_core, axis=2) * K.cast( mask[:, :, 0, :self.atom_fea_len], tf.float32) # batch norm 2 mu_2 = K.sum(nbr_summed) / tf.math.count_nonzero(nbr_summed, dtype=tf.float32) diff_squared_2 = (nbr_summed - mu_2)**2 * K.cast( mask[:, :, 0, :nbr_summed.shape[-1]], tf.float32) var_2 = K.sum(diff_squared_2) / tf.math.count_nonzero(diff_squared_2, dtype=tf.float32) nbr_summed = K.batch_normalization(nbr_summed, mu_2, var_2, self.beta_2, self.gamma_2, epsilon=1e-5) nbr_summed = nbr_summed * K.cast(mask[:, :, 0, :nbr_summed.shape[-1]], tf.float32) return K.softplus(atom_fea + nbr_summed) * K.cast( mask[:, :, 0, :self.atom_fea_len], tf.float32)
def call(self, x, mask=None): input_shape = x.get_shape().as_list() if self.dim_ordering == 'th': rows = input_shape[2] cols = input_shape[3] elif self.dim_ordering == 'tf': rows = input_shape[1] cols = input_shape[2] else: raise ValueError('Invalid dim_ordering:', self.dim_ordering) rows = 2 * rows cols = 2 * cols if self.dim_ordering == 'th': outputShape = (self.batch_size, 3, rows, cols ) # 32 = input_shape[0] elif self.dim_ordering == 'tf': outputShape = (self.batch_size, rows, cols, 3) #print "output Shape (outputShape):", outputShape height_factor = 2 width_factor = 2 if self.dim_ordering == 'th': new_height = x.shape[2].value * height_factor new_width = x.shape[3].value * width_factor x = tf.transpose(x, [0, 2, 3, 1]) x = tf.image.resize_nearest_neighbor(x, (new_height, new_width)) output = tf.transpose(x, [0, 3, 1, 2]) elif self.dim_ordering == 'tf': new_height = x.shape[1].value * height_factor new_width = x.shape[2].value * width_factor output = tf.image.resize_nearest_neighbor(x, (new_height, new_width)) else: raise Exception('Invalid dim_ordering: ' + dim_ordering) #print "output Shape:", output # added for batch normalization input_shape = K.int_shape(output) axis = 1 reduction_axes = list(range(len(input_shape))) del reduction_axes[axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[axis] = input_shape[axis] output_normed, mean, std = K.normalize_batch_in_training( output, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon) self.add_update([ K.moving_average_update(self.running_mean, mean, self.momentum), K.moving_average_update(self.running_std, std, self.momentum) ], output) if sorted(reduction_axes) == range(K.ndim(output))[:-1]: output_normed_running = K.batch_normalization(output, self.running_mean, self.running_std, self.beta, self.gamma, epsilon=self.epsilon) else: # need broadcasting broadcast_running_mean = K.reshape(self.running_mean, broadcast_shape) broadcast_running_std = K.reshape(self.running_std, broadcast_shape) broadcast_beta = K.reshape(self.beta, broadcast_shape) broadcast_gamma = K.reshape(self.gamma, broadcast_shape) output_normed_running = K.batch_normalization( output, broadcast_running_mean, broadcast_running_std, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) # pick the normalized form of output corresponding to the training phase output_normed = K.in_train_phase(output_normed, output_normed_running) if self.bias: if self.dim_ordering == 'th': output_normed += K.reshape(self.b, (1, self.nb_filter, 1, 1)) elif self.dim_ordering == 'tf': output_normed += K.reshape(self.b, (1, 1, 1, self.nb_filter)) else: raise ValueError('Invalid dim_ordering:', self.dim_ordering) output = self.activation(output_normed) return output
def train_and_apply(): np.random.seed(1) ROOT.gROOT.SetBatch() #Extract data from root file tree = uproot.open("out_all.root")["outA/Tevts"] branch_mc = [ "MC_B_P", "MC_B_eta", "MC_B_phi", "MC_B_pt", "MC_D0_P", "MC_D0_eta", "MC_D0_phi", "MC_D0_pt", "MC_Dst_P", "MC_Dst_eta", "MC_Dst_phi", "MC_Dst_pt", "MC_Est_mu", "MC_M2_miss", "MC_mu_P", "MC_mu_eta", "MC_mu_phi", "MC_mu_pt", "MC_pis_P", "MC_pis_eta", "MC_pis_phi", "MC_pis_pt", "MC_q2" ] branch_rec = [ "B_P", "B_eta", "B_phi", "B_pt", "D0_P", "D0_eta", "D0_phi", "D0_pt", "Dst_P", "Dst_eta", "Dst_phi", "Dst_pt", "Est_mu", "M2_miss", "mu_P", "mu_eta", "mu_phi", "mu_pt", "pis_P", "pis_eta", "pis_phi", "pis_pt", "q2" ] nvariable = len(branch_mc) x_train = tree.array(branch_mc[0], entrystop=options.maxevents) for i in range(1, nvariable): x_train = np.vstack( (x_train, tree.array(branch_mc[i], entrystop=options.maxevents))) x_test = tree.array(branch_rec[0], entrystop=options.maxevents) for i in range(1, nvariable): x_test = np.vstack( (x_test, tree.array(branch_rec[i], entrystop=options.maxevents))) x_train = x_train.T x_test = x_test.T x_test = array2D_float(x_test) #Different type of reconstruction variables #BN normalization gamma = 0 beta = 0.2 ar = np.array(x_train) a = K.constant(ar[:, 0]) mean = K.mean(a) var = K.var(a) x_train = K.eval(K.batch_normalization(a, mean, var, gamma, beta)) for i in range(1, nvariable): a = K.constant(ar[:, i]) mean = K.mean(a) var = K.var(a) a = K.eval(K.batch_normalization(a, mean, var, gamma, beta)) x_train = np.vstack((x_train, a)) x_train = x_train.T ar = np.array(x_test) a = K.constant(ar[:, 0]) mean = K.mean(a) var = K.var(a) x_test = K.eval(K.batch_normalization(a, mean, var, gamma, beta)) for i in range(1, nvariable): a = K.constant(ar[:, i]) mean = K.mean(a) var = K.var(a) a = K.eval(K.batch_normalization(a, mean, var, gamma, beta)) x_test = np.vstack((x_test, a)) x_test = x_test.T #Add noise, remain to be improved noise = np.random.normal(loc=0.0, scale=0.01, size=x_train.shape) x_train_noisy = x_train + noise noise = np.random.normal(loc=0.0, scale=0.01, size=x_test.shape) x_test_noisy = x_test + noise x_train = np.clip(x_train, -1., 1.) x_test = np.clip(x_test, -1., 1.) x_train_noisy = np.clip(x_train_noisy, -1., 1.) x_test_noisy = np.clip(x_test_noisy, -1., 1.) # Network parameters input_shape = (x_train.shape[1], ) batch_size = 128 latent_dim = 2 # Build the Autoencoder Model # First build the Encoder Model inputs = Input(shape=input_shape, name='encoder_input') x = inputs # Shape info needed to build Decoder Model shape = K.int_shape(x) # Generate the latent vector latent = Dense(latent_dim, name='latent_vector')(x) # Instantiate Encoder Model encoder = Model(inputs, latent, name='encoder') encoder.summary() # Build the Decoder Model latent_inputs = Input(shape=(latent_dim, ), name='decoder_input') x = Dense(shape[1])(latent_inputs) x = Reshape((shape[1], ))(x) outputs = Activation('tanh', name='decoder_output')(x) # Instantiate Decoder Model decoder = Model(latent_inputs, outputs, name='decoder') decoder.summary() # Autoencoder = Encoder + Decoder # Instantiate Autoencoder Model autoencoder = Model(inputs, decoder(encoder(inputs)), name='autoencoder') autoencoder.summary() autoencoder.compile(loss='mse', optimizer='adam') # Train the autoencoder autoencoder.fit(x_train_noisy, x_train, validation_data=(x_test_noisy, x_test), epochs=options.epochs, batch_size=batch_size) # Predict the Autoencoder output from corrupted test imformation x_decoded = autoencoder.predict(x_test_noisy) # Draw Comparision Plots c = TCanvas("c", "c", 700, 700) fPads1 = TPad("pad1", "Run2", 0.0, 0.29, 1.00, 1.00) fPads2 = TPad("pad2", "", 0.00, 0.00, 1.00, 0.29) fPads1.SetBottomMargin(0.007) fPads1.SetLeftMargin(0.10) fPads1.SetRightMargin(0.03) fPads2.SetLeftMargin(0.10) fPads2.SetRightMargin(0.03) fPads2.SetBottomMargin(0.25) fPads1.Draw() fPads2.Draw() fPads1.cd() nbin = 50 min = -1. max = 1. variable = "P^{B}" lbin = (max - min) / nbin lbin = str(float((max - min) / nbin)) xtitle = branch_rec[options.branch - 1] ytitle = "Events/" + lbin + "GeV" h_rec = TH1D("h_rec", "" + ";%s;%s" % (xtitle, ytitle), nbin, min, max) h_rec.Sumw2() h_pre = TH1D("h_pre", "" + ";%s;%s" % (xtitle, ytitle), nbin, min, max) h_pre.Sumw2() for i in range(x_test_noisy.shape[0]): h_rec.Fill(x_test_noisy[i][options.branch - 1]) h_pre.Fill(x_decoded[i][options.branch - 1]) h_rec = UnderOverFlow1D(h_rec) h_pre = UnderOverFlow1D(h_pre) maxY = TMath.Max(h_rec.GetMaximum(), h_pre.GetMaximum()) h_rec.SetLineColor(2) h_rec.SetFillStyle(0) h_rec.SetLineWidth(2) h_rec.SetLineStyle(1) h_pre.SetLineColor(3) h_pre.SetFillStyle(0) h_pre.SetLineWidth(2) h_pre.SetLineStyle(1) h_rec.SetStats(0) h_pre.SetStats(0) h_rec.GetYaxis().SetRangeUser(0, maxY * 1.1) h_rec.Draw("HIST") h_pre.Draw("same HIST") h_rec.GetYaxis().SetTitleSize(0.06) h_rec.GetYaxis().SetTitleOffset(0.78) theLeg = TLegend(0.5, 0.45, 0.95, 0.82, "", "NDC") theLeg.SetName("theLegend") theLeg.SetBorderSize(0) theLeg.SetLineColor(0) theLeg.SetFillColor(0) theLeg.SetFillStyle(0) theLeg.SetLineWidth(0) theLeg.SetLineStyle(0) theLeg.SetTextFont(42) theLeg.SetTextSize(.05) theLeg.AddEntry(h_rec, "Reconstruction", "L") theLeg.AddEntry(h_pre, "Prediction", "L") theLeg.SetY1NDC(0.9 - 0.05 * 6 - 0.005) theLeg.SetY1(theLeg.GetY1NDC()) fPads1.cd() theLeg.Draw() title = TLatex( 0.91, 0.93, "AE prediction compare with reconstruction, epochs=" + str(options.epochs)) title.SetNDC() title.SetTextSize(0.05) title.SetTextFont(42) title.SetTextAlign(31) title.SetLineWidth(2) title.Draw() fPads2.cd() h_Ratio = h_pre.Clone("h_Ratio") h_Ratio.Divide(h_rec) h_Ratio.SetLineColor(1) h_Ratio.SetLineWidth(2) h_Ratio.SetMarkerStyle(8) h_Ratio.SetMarkerSize(0.7) h_Ratio.GetYaxis().SetRangeUser(0, 2) h_Ratio.GetYaxis().SetNdivisions(504, 0) h_Ratio.GetYaxis().SetTitle("Pre/Rec") h_Ratio.GetYaxis().SetTitleOffset(0.35) h_Ratio.GetYaxis().SetTitleSize(0.13) h_Ratio.GetYaxis().SetTitleSize(0.13) h_Ratio.GetYaxis().SetLabelSize(0.11) h_Ratio.GetXaxis().SetLabelSize(0.1) h_Ratio.GetXaxis().SetTitleOffset(0.8) h_Ratio.GetXaxis().SetTitleSize(0.14) h_Ratio.SetStats(0) axis1 = TGaxis(min, 1, max, 1, 0, 0, 0, "L") axis1.SetLineColor(1) axis1.SetLineWidth(1) for i in range(1, h_Ratio.GetNbinsX() + 1, 1): D = h_rec.GetBinContent(i) eD = h_rec.GetBinError(i) if D == 0: eD = 0.92 B = h_pre.GetBinContent(i) eB = h_pre.GetBinError(i) if B < 0.1 and eB >= B: eB = 0.92 Err = 0. if B != 0.: Err = TMath.Sqrt((eD * eD) / (B * B) + (D * D * eB * eB) / (B * B * B * B)) h_Ratio.SetBinContent(i, D / B) h_Ratio.SetBinError(i, Err) if B == 0.: Err = TMath.Sqrt((eD * eD) / (eB * eB) + (D * D * eB * eB) / (eB * eB * eB * eB)) h_Ratio.SetBinContent(i, D / 0.92) h_Ratio.SetBinError(i, Err) if D == 0 and B == 0: h_Ratio.SetBinContent(i, -1) h_Ratio.SetBinError(i, 0) h_Ratio.Draw("e0") axis1.Draw() c.SaveAs(branch_rec[options.branch - 1] + "_comparision.png")
def normalize_inference(): if needs_broadcasting: # In this case we must explicitly broadcast all parameters. broadcast_moving_mean = K.reshape(self.moving_mean, broadcast_shape) broadcast_moving_variance = K.reshape(self.moving_variance, broadcast_shape) if self.center: broadcast_beta = \ tf.case({ c1: lambda: K.reshape(self.beta1, broadcast_shape), c2: lambda: K.reshape(self.beta2, broadcast_shape) }, default=lambda: K.reshape(self.beta3, broadcast_shape) ) else: broadcast_beta = None if self.scale: broadcast_gamma = \ tf.case({ c1: lambda: K.reshape(self.gamma1, broadcast_shape), c2: lambda: K.reshape(self.gamma2, broadcast_shape) }, default=lambda: K.reshape(self.gamma3, broadcast_shape) ) else: broadcast_gamma = None return K.batch_normalization(inputs[0], broadcast_moving_mean, broadcast_moving_variance, broadcast_beta, broadcast_gamma, axis=self.axis, epsilon=self.epsilon) else: out = \ tf.case({ c1: lambda: K.batch_normalization( inputs[0], self.moving_mean, self.moving_variance, self.beta1, self.gamma1, axis=self.axis, epsilon=self.epsilon), c2: lambda: K.batch_normalization( inputs[0], self.moving_mean, self.moving_variance, self.beta2, self.gamma2, axis=self.axis, epsilon=self.epsilon) }, default=lambda: K.batch_normalization( inputs[0], self.moving_mean, self.moving_variance, self.beta3, self.gamma3, axis=self.axis, epsilon=self.epsilon) ) return out