def call(self, x, mask=None): if K.backend() == 'theano': return K.softplus( K.pattern_broadcast(self.beta, self.param_broadcast) * x) * K.pattern_broadcast(self.alpha, self.param_broadcast) else: return K.softplus(self.beta * x) * self.alpha
def call(self, x, mask=None): pos = K.relu(x) if K.backend() == 'theano': neg = (K.pattern_broadcast(self.alpha, self.param_broadcast) * K.tanh( (K.pattern_broadcast(self.beta, self.param_broadcast) * (x - K.abs(x)) * 0.5))) else: neg = self.alpha * K.tanh(self.beta * (-K.relu(-x))) return neg + pos
def call(self, x, mask=None): if K.backend() == 'theano': pos = K.relu(x) * (K.pattern_broadcast(self.alpha, self.param_broadcast) / K.pattern_broadcast(self.beta, self.param_broadcast)) neg = (K.pattern_broadcast(self.alpha, self.param_broadcast) * (K.exp((-K.relu(-x)) / K.pattern_broadcast(self.beta, self.param_broadcast)) - 1)) else: pos = K.relu(x) * self.alpha / self.beta neg = self.alpha * (K.exp((-K.relu(-x)) / self.beta) - 1) return neg + pos
def call(self, inputs, mask=None): if K.backend() == 'theano': a = K.pattern_broadcast(self.a, self.a_param_broadcast) k = K.pattern_broadcast(self.k, self.k_param_broadcast) n = K.pattern_broadcast(self.n, self.n_param_broadcast) z = K.pattern_broadcast(self.z, self.z_param_broadcast) else: a = self.a k = self.k n = self.n z = self.z return a / (K.pow((k / (inputs + 1e-5)), n) + z + 1e-5)
def _se_block(inputs, filters, se_ratio, prefix): x = GlobalAveragePooling2D(name=prefix + 'squeeze_excite/AvgPool')(inputs) if K.image_data_format() == 'channels_first': x = Reshape((filters, 1, 1))(x) else: x = Reshape((1, 1, filters))(x) x = Conv2D(_depth(filters * se_ratio), kernel_size=1, padding='same', name=prefix + 'squeeze_excite/Conv')(x) x = ReLU(name=prefix + 'squeeze_excite/Relu')(x) x = Conv2D(filters, kernel_size=1, padding='same', name=prefix + 'squeeze_excite/Conv_1')(x) x = Activation(hard_sigmoid)(x) if K.backend() == 'theano': # For the Theano backend, make the excitation weights broadcastable explicitly. x = Lambda( lambda br: K.pattern_broadcast(br, [True, True, True, False]), output_shape=lambda input_shape: input_shape, name=prefix + 'squeeze_excite/broadcast')(x) x = Multiply(name=prefix + 'squeeze_excite/Mul')([inputs, x]) return x
def call(self, inputs): stim = inputs[0] center = inputs[1][0] centers_x = self.XX[None, :, :, None] - center[:, 0, None, None, None] - self.centers[0][None, None, None, :] centers_y = self.YY[None, :, :, None] - center[:, 1, None, None, None] - self.centers[1][None, None, None, :] senv = self.stds[None, None, None, :] gauss = self.gauss_scale * ( K.square(self.dx) / (2 * np.pi * K.square(senv) + K.epsilon())) * K.exp( -(K.square(centers_x) + K.square(centers_y)) / (2.0 * K.square(senv))) # gauss = (1 / K.sqrt(2 * np.pi * K.square(senv) + K.epsilon()))*K.exp(-(K.square(centers_x) + K.square(centers_y))/(2.0 * K.square(senv))) # gauss /= K.max(gauss, axis=(1, 2), keepdims=True) gauss = K.reshape(gauss, self.kernel_shape) if K.backend() == 'theano': output = K.sum(stim[..., None] * K.pattern_broadcast(gauss, self.kernel_broadcast), axis=self.filter_axes, keepdims=False) else: output = K.sum(stim[..., None] * gauss, axis=self.filter_axes, keepdims=False) return output
def call(self, inputs, mask=None): pos = K.relu(inputs) if K.backend() == 'theano': neg = (K.pattern_broadcast(self.alpha, self.param_broadcast) * (inputs - K.abs(inputs)) * 0.5) else: neg = -self.alpha * K.relu(-inputs) return pos + neg
def call(self, x, mask=None): # ensure the the right part is always to the right of the left t_right_actual = self.t_left + K.abs(self.t_right) if K.backend() == "theano": t_left = K.pattern_broadcast(self.t_left, self.param_broadcast) a_left = K.pattern_broadcast(self.a_left, self.param_broadcast) a_right = K.pattern_broadcast(self.a_right, self.param_broadcast) t_right_actual = K.pattern_broadcast(t_right_actual, self.param_broadcast) else: t_left = self.t_left a_left = self.a_left a_right = self.a_right y_left_and_center = t_left + K.relu(x - t_left, a_left, t_right_actual - t_left) y_right = K.relu(x - t_right_actual) * a_right return y_left_and_center + y_right
def block(inputs, args, activation, drop_rate=None, prefix='', ): # Define the mobile inverted residual bottleneck. bn_axis = -1 if K.image_data_format() == 'channels_last' else 1 # Assign bn_axis = -1 or 1 # The expansion phase filters = args.filters_in * args.expand_ratio if args.expand_ratio != 1: x = Conv2D(filters, kernel_size=(1,1), padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name=prefix + 'expand_conv')(inputs) x = BatchNormalization(axis=bn_axis, name=prefix + 'expand_bn')(x) x = Activation(activation, name=prefix + 'expand_activation')(x) else: x = inputs # Conduct the depthwise convolution x = DepthwiseConv2D(args.kernel_size, strides=args.strides, padding='same', use_bias=False, depthwise_initializer=CONV_KERNEL_INITIALIZER, name=prefix + 'dwconv')(x) x = BatchNormalization(axis=bn_axis, name=prefix + 'bn')(x) x = Activation(activation, name=prefix + 'activation')(x) # Squeeze and Excitation phase if 0 < args.se_ratio <= 1: filters_se = max(1, int(args.filters_in*args.se_ratio)) se = GlobalAveragePooling2D(name=prefix + 'se_squeeze')(x) if bn_axis == -1: # It is equality: bn_axis == -1 se = Reshape((1,1,filters), name=prefix + 'se_reshape')(se) else: se = Reshape((filters,1,1), name=prefix + 'se_reshape')(se) se = Conv2D(filters_se, kernel_size=(1,1), activation=activation, padding='same', use_bias=True, kernel_initializer=CONV_KERNEL_INITIALIZER, name=prefix + 'se_reduce')(se) se = Conv2D(filters, kernel_size=(1,1), activation='sigmoid', padding='same', use_bias=True, kernel_initializer=CONV_KERNEL_INITIALIZER, name=prefix + 'se_expand')(se) if K.backend() == 'theano': # For the Theano, make the excitation weights broadcastable explicitly. if K.image_data_format() == 'channels_last': pattern = [True, True, True, False] else: pattern = [True, False, True, True] se = Lambda(lambda x: K.pattern_broadcast(x, pattern), name=prefix + 'se_broadcast')(se) x = multiply([x, se], name=prefix + 'se_excite') # Output phase x = Conv2D(args.filters_out, kernel_size=(1,1), padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name=prefix + 'project_conv')(x) x = BatchNormalization(axis=bn_axis, name=prefix + 'project_bn')(x) if args.id_skip and all(s == 1 for s in args.strides) and args.filters_in == args.filters_out: if drop_rate and (drop_rate > 0): x = Dropout(drop_rate, noise_shape=(None,1,1,1), name=prefix + 'drop')(x) x = add([x, inputs], name=prefix + 'add') return x
def call(self, x, mask=None): b, xb = 0.0, 0.0 if self.data_format == "channels_first": kernel_sum_axes = [1, 2, 3] if self.use_bias: b = K.reshape(self.b, (self.filters, 1, 1, 1)) xb = 1.0 elif self.data_format == "channels_last": kernel_sum_axes = [0, 1, 2] if self.use_bias: b = K.reshape(self.b, (1, 1, 1, self.filters)) xb = 1.0 tmp = K.sum(K.square(self.W), axis=kernel_sum_axes, keepdims=True) Wnorm = K.sqrt(tmp + K.square(b) + K.epsilon()) tmp = KC.conv2d( K.square(x), self.kernel_norm, strides=self.strides, padding=self.padding, data_format=self.data_format, filter_shape=self.kernel_norm_shape, ) xnorm = K.sqrt(tmp + xb + K.epsilon()) W = self.W / Wnorm output = KC.conv2d( x, W, strides=self.strides, padding=self.padding, data_format=self.data_format, filter_shape=self.kernel_shape, ) if K.backend() == "theano": xnorm = K.pattern_broadcast(xnorm, [False, True, False, False]) output /= xnorm if self.use_bias: b /= Wnorm if self.data_format == "channels_first": b = K.reshape(b, (1, self.filters, 1, 1)) elif self.data_format == "channels_last": b = K.reshape(b, (1, 1, 1, self.filters)) else: raise ValueError("Invalid data_format:", self.data_format) b /= xnorm output += b output = self.activation(output) return output
def call(self, inputs): mod = get_abs(inputs) if K.backend() == 'theano': s = mod + K.pattern_broadcast(self.b, self.broadcast) else: s = mod + self.b rs = K.relu(s) real = rs * get_realpart(inputs) / mod imag = rs * get_imagpart(inputs) / mod return K.concatenate((real, imag), axis=-1)
def block(inputs, activation_fn=swish, drop_rate=0., name='', filters_in=32, filters_out=16, kernel_size=3, strides=1, expand_ratio=1, se_ratio=0., id_skip=True): """A mobile inverted residual block. # Arguments inputs: input tensor. activation_fn: activation function. drop_rate: float between 0 and 1, fraction of the input units to drop. name: string, block label. filters_in: integer, the number of input filters. filters_out: integer, the number of output filters. kernel_size: integer, the dimension of the convolution window. strides: integer, the stride of the convolution. expand_ratio: integer, scaling coefficient for the input filters. se_ratio: float between 0 and 1, fraction to squeeze the input filters. id_skip: boolean. # Returns output tensor for the block. """ bn_axis = 3 if K.image_data_format() == 'channels_last' else 1 # Expansion phase filters = filters_in * expand_ratio if expand_ratio != 1: x = Conv2D(filters, 1, padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name=name + 'expand_conv')(inputs) x = BatchNormalization(axis=bn_axis, name=name + 'expand_bn')(x) x = Activation(activation_fn, name=name + 'expand_activation')(x) else: x = inputs # Depthwise Convolution if strides == 2: x = ZeroPadding2D(padding=correct_pad(K, x, kernel_size), name=name + 'dwconv_pad')(x) conv_pad = 'valid' else: conv_pad = 'same' x = DepthwiseConv2D(kernel_size, strides=strides, padding=conv_pad, use_bias=False, depthwise_initializer=CONV_KERNEL_INITIALIZER, name=name + 'dwconv')(x) x = BatchNormalization(axis=bn_axis, name=name + 'bn')(x) x = Activation(activation_fn, name=name + 'activation')(x) # Squeeze and Excitation phase if 0 < se_ratio <= 1: filters_se = max(1, int(filters_in * se_ratio)) se = GlobalAveragePooling2D(name=name + 'se_squeeze')(x) se = Reshape((1, 1, filters), name=name + 'se_reshape')(se) se = Conv2D(filters_se, 1, padding='same', activation=activation_fn, kernel_initializer=CONV_KERNEL_INITIALIZER, name=name + 'se_reduce')(se) se = Conv2D(filters, 1, padding='same', activation='sigmoid', kernel_initializer=CONV_KERNEL_INITIALIZER, name=name + 'se_expand')(se) if K.backend() == 'theano': # For the Theano backend, we have to explicitly make # the excitation weights broadcastable. se = Lambda( lambda x: K.pattern_broadcast(x, [True, True, True, False]), output_shape=lambda input_shape: input_shape, name=name + 'se_broadcast')(se) x = multiply([x, se], name=name + 'se_excite') # Output phase x = Conv2D(filters_out, 1, padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name=name + 'project_conv')(x) x = BatchNormalization(axis=bn_axis, name=name + 'project_bn')(x) if (id_skip is True and strides == 1 and filters_in == filters_out): if drop_rate > 0: if tf2.enabled(): x = Dropout(drop_rate, noise_shape=(None, 1, 1, 1), name=name + 'drop')(x) else: x = Dropout( drop_rate, #noise_shape=(None, 1, 1, 1), name=name + 'drop')(x) x = add([x, inputs], name=name + 'add') return x
def mb_conv_block(inputs, block_args, activation, drop_rate=None, prefix='', spatial_dropout=False): """Mobile Inverted Residual Bottleneck.""" has_se = (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 # workaround over non working dropout with None in noise_shape in tf.keras Dropout = get_dropout(backend=backend, layers=layers, models=models, utils=keras_utils) # Expansion phase filters = block_args.input_filters * block_args.expand_ratio if block_args.expand_ratio != 1: x = layers.Conv2D(filters, 1, padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name=prefix + 'expand_conv')(inputs) x = layers.BatchNormalization(axis=bn_axis, name=prefix + 'expand_bn')(x) x = layers.Activation(activation, name=prefix + 'expand_activation')(x) else: x = inputs # Depthwise Convolution x = layers.DepthwiseConv2D(block_args.kernel_size, strides=block_args.strides, padding='same', use_bias=False, depthwise_initializer=CONV_KERNEL_INITIALIZER, name=prefix + 'dwconv')(x) if spatial_dropout: x = layers.SpatialDropout2D(0.5)(x) x = layers.BatchNormalization(axis=bn_axis, name=prefix + 'bn')(x) x = layers.Activation(activation, name=prefix + 'activation')(x) # Squeeze and Excitation phase if has_se: num_reduced_filters = max( 1, int(block_args.input_filters * block_args.se_ratio)) se_tensor = layers.GlobalAveragePooling2D(name=prefix + 'se_squeeze')(x) target_shape = ( 1, 1, filters) if backend.image_data_format() == 'channels_last' else ( filters, 1, 1) se_tensor = layers.Reshape(target_shape, name=prefix + 'se_reshape')(se_tensor) se_tensor = layers.Conv2D(num_reduced_filters, 1, activation=activation, padding='same', use_bias=True, kernel_initializer=CONV_KERNEL_INITIALIZER, name=prefix + 'se_reduce')(se_tensor) se_tensor = layers.Conv2D(filters, 1, activation='sigmoid', padding='same', use_bias=True, kernel_initializer=CONV_KERNEL_INITIALIZER, name=prefix + 'se_expand')(se_tensor) if backend.backend() == 'theano': # For the Theano backend, we have to explicitly make # the excitation weights broadcastable. pattern = ([True, True, True, False] if backend.image_data_format() == 'channels_last' else [True, False, True, True]) se_tensor = layers.Lambda( lambda x: backend.pattern_broadcast(x, pattern), name=prefix + 'se_broadcast')(se_tensor) x = layers.multiply([x, se_tensor], name=prefix + 'se_excite') # Output phase x = layers.Conv2D(block_args.output_filters, 1, padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name=prefix + 'project_conv')(x) x = layers.BatchNormalization(axis=bn_axis, name=prefix + 'project_bn')(x) if block_args.id_skip and all( s == 1 for s in block_args.strides ) and block_args.input_filters == block_args.output_filters: if drop_rate and (drop_rate > 0): x = Dropout(drop_rate, noise_shape=(None, 1, 1, 1), name=prefix + 'drop')(x) x = layers.add([x, inputs], name=prefix + 'add') return x
def call(self, inputs, states, training=None): """We need to reimplmenet `call` entirely rather than reusing that from `GRUCell` since there are lots of differences. Args: inputs: One tensor which is stacked by 3 inputs (x, m, s) x and m are of shape (n_batch * input_dim). s is of shape (n_batch, 1). states: states and other values from the previous step. (h_tm1, x_keep_tm1, s_prev_tm1) """ # Get inputs and states input_x = inputs[:, :self.true_input_dim] # inputs x, m, s input_m = inputs[:, self.true_input_dim:-1] input_s = inputs[:, -1:] # Need to add broadcast for time_stamp if using theano backend. if K.backend() == 'theano': input_s = K.pattern_broadcast(input_s, [False, True]) h_tm1, x_keep_tm1, s_prev_tm1 = states # previous memory ([n_batch * self.units]) # previous input x ([n_batch * input_dim]) # and the subtraction term (of delta_t^d in Equation (2)) # ([n_batch * input_dim]) input_1m = K.cast_to_floatx(1.) - input_m input_d = input_s - s_prev_tm1 # Get dropout if 0. < self.dropout < 1. and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask(K.ones_like(input_x), self.dropout, training=training, count=3) if (0. < self.recurrent_dropout < 1. and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( K.ones_like(h_tm1), self.recurrent_dropout, training=training, count=3) dp_mask = self._dropout_mask rec_dp_mask = self._recurrent_dropout_mask if self.feed_masking: if 0. < self.dropout < 1. and self._masking_dropout_mask is None: self._masking_dropout_mask = _generate_dropout_mask( K.ones_like(input_m), self.dropout, training=training, count=3) m_dp_mask = self._masking_dropout_mask # Compute decay if any if self.input_decay is not None: gamma_di = input_d * self.input_decay_kernel if self.use_decay_bias: gamma_di = K.bias_add(gamma_di, self.input_decay_bias) gamma_di = self.input_decay(gamma_di) if self.hidden_decay is not None: gamma_dh = K.dot(input_d, self.hidden_decay_kernel) if self.use_decay_bias: gamma_dh = K.bias_add(gamma_dh, self.hidden_decay_bias) gamma_dh = self.hidden_decay(gamma_dh) if self.feed_masking and self.masking_decay is not None: gamma_dm = input_d * self.masking_decay_kernel if self.use_decay_bias: gamma_dm = K.bias_add(gamma_dm, self.masking_decay_bias) gamma_dm = self.masking_decay(gamma_dm) # Get the imputed or decayed input if needed # and `x_keep_t` for the next time step if self.input_decay is not None: x_keep_t = K.switch(input_m, input_x, x_keep_tm1) x_t = K.switch(input_m, input_x, gamma_di * x_keep_t) elif self.x_imputation == 'forward': x_t = K.switch(input_m, input_x, x_keep_tm1) x_keep_t = x_t elif self.x_imputation == 'zero': x_t = K.switch(input_m, input_x, K.zeros_like(input_x)) x_keep_t = x_t elif self.x_imputation == 'raw': x_t = input_x x_keep_t = x_t else: raise ValueError('No input decay or invalid x_imputation ' '{}.'.format(self.x_imputation)) # Get decayed hidden if needed if self.hidden_decay is not None: h_tm1d = gamma_dh * h_tm1 else: h_tm1d = h_tm1 # Get decayed masking if needed if self.feed_masking: m_t = input_1m if self.masking_decay is not None: m_t = gamma_dm * m_t # Apply the dropout if 0. < self.dropout < 1.: x_z, x_r, x_h = x_t * dp_mask[0], x_t * dp_mask[1], x_t * dp_mask[2] if self.feed_masking: m_z, m_r, m_h = (m_t * m_dp_mask[0], m_t * m_dp_mask[1], m_t * m_dp_mask[2]) else: x_z, x_r, x_h = x_t, x_t, x_t if self.feed_masking: m_z, m_r, m_h = m_t, m_t, m_t if 0. < self.recurrent_dropout < 1.: h_tm1_z, h_tm1_r = ( h_tm1d * rec_dp_mask[0], h_tm1d * rec_dp_mask[1], ) else: h_tm1_z, h_tm1_r = h_tm1d, h_tm1d # Get z_t, r_t, hh_t z_t = K.dot(x_z, self.kernel_z) + K.dot(h_tm1_z, self.recurrent_kernel_z) r_t = K.dot(x_r, self.kernel_r) + K.dot(h_tm1_r, self.recurrent_kernel_r) hh_t = K.dot(x_h, self.kernel_h) if self.feed_masking: z_t += K.dot(m_z, self.masking_kernel_z) r_t += K.dot(m_r, self.masking_kernel_r) hh_t += K.dot(m_h, self.masking_kernel_h) if self.use_bias: z_t = K.bias_add(z_t, self.input_bias_z) r_t = K.bias_add(r_t, self.input_bias_r) hh_t = K.bias_add(hh_t, self.input_bias_h) z_t = self.recurrent_activation(z_t) r_t = self.recurrent_activation(r_t) if 0. < self.recurrent_dropout < 1.: h_tm1_h = r_t * h_tm1d * rec_dp_mask[2] else: h_tm1_h = r_t * h_tm1d hh_t = self.activation(hh_t + K.dot(h_tm1_h, self.recurrent_kernel_h)) # get h_t h_t = z_t * h_tm1 + (1 - z_t) * hh_t if 0. < self.dropout + self.recurrent_dropout: if training is None: h_t._uses_learning_phase = True # get s_prev_t s_prev_t = K.switch(input_m, K.tile(input_s, [1, self.state_size[-1]]), s_prev_tm1) return h_t, [h_t, x_keep_t, s_prev_t]
def call(self, x, mask=None): if K.backend() == 'theano': return K.softplus(K.pattern_broadcast(self.beta, self.param_broadcast) * x) * K.pattern_broadcast(self.alpha, self.param_broadcast) else: return K.softplus(self.beta * x) * self.alpha