def extract_image_patches(x, ksizes, ssizes, padding='same', data_format='tf'): ''' Extract the patches from an image # Parameters x : The input image ksizes : 2-d tuple with the kernel size ssizes : 2-d tuple with the strides size padding : 'same' or 'valid' data_format : 'channels_last' or 'channels_first' # Returns The (k_w,k_h) patches extracted TF ==> (batch_size,w,h,k_w,k_h,c) TH ==> (batch_size,w,h,c,k_w,k_h) ''' kernel = [1, ksizes[0], ksizes[1], 1] strides = [1, ssizes[0], ssizes[1], 1] padding = _preprocess_padding(padding) if data_format == 'channels_first': x = KTF.permute_dimensions(x, (0, 2, 3, 1)) bs_i, w_i, h_i, ch_i = KTF.int_shape(x) patches = tf.extract_image_patches(x, kernel, strides, [1, 1, 1, 1], padding) # Reshaping to fit Theano bs, w, h, ch = KTF.int_shape(patches) patches = tf.reshape( tf.transpose( tf.reshape(patches, [-1, w, h, tf.floordiv(ch, ch_i), ch_i]), [0, 1, 2, 4, 3]), [-1, w, h, ch_i, ksizes[0], ksizes[1]]) if data_format == 'channels_last': patches = KTF.permute_dimensions(patches, [0, 1, 2, 4, 5, 3]) return patches
def resize_images_bilinear(X, height_factor=1, width_factor=1, target_height=None, target_width=None, data_format='default'): '''Resizes the images contained in a 4D tensor of shape - [batch, channels, height, width] (for 'channels_first' data_format) - [batch, height, width, channels] (for 'channels_last' data_format) by a factor of (height_factor, width_factor). Both factors should be positive integers. ''' if data_format == 'default': data_format = K.image_data_format() if data_format == 'channels_first': original_shape = K.int_shape(X) if target_height and target_width: new_shape = tf.constant( np.array((target_height, target_width)).astype('int32')) else: new_shape = tf.shape(X)[2:] new_shape *= tf.constant( np.array([height_factor, width_factor]).astype('int32')) X = permute_dimensions(X, [0, 2, 3, 1]) X = tf.image.resize_bilinear(X, new_shape) X = permute_dimensions(X, [0, 3, 1, 2]) if target_height and target_width: X.set_shape((None, None, target_height, target_width)) else: X.set_shape((None, None, original_shape[2] * height_factor, original_shape[3] * width_factor)) return X elif data_format == 'channels_last': original_shape = K.int_shape(X) if target_height and target_width: new_shape = tf.constant( np.array((target_height, target_width)).astype('int32')) else: new_shape = tf.shape(X)[1:3] new_shape *= tf.constant( np.array([height_factor, width_factor]).astype('int32')) X = tf.image.resize_bilinear(X, new_shape) if target_height and target_width: X.set_shape((None, target_height, target_width, None)) else: X.set_shape((None, original_shape[1] * height_factor, original_shape[2] * width_factor, None)) return X else: raise Exception('Invalid data_format: ' + data_format)
def _call_multiplicative_emission(self, inputs): # e_{t, t'} = x_t^T W_a x_{t'} + b_a e = K.batch_dot(K.dot(inputs, self.Wa), K.permute_dimensions(inputs, (0, 2, 1))) if self.use_attention_bias: e += self.ba[0] return e
def gram_matrix(x): assert K.ndim(x) == 3 if K.image_dim_ordering() == 'th': features = K.batch_flatten(x) else: features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1))) gram = K.dot(features - 1, K.transpose(features - 1)) return gram
def _attention_regularizer(self, attention): batch_size = K.cast(K.shape(attention)[0], K.floatx()) input_len = K.shape(attention)[-1] indices = K.expand_dims(K.arange(0, input_len), axis=0) diagonal = K.expand_dims(K.arange(0, input_len), axis=-1) eye = K.cast(K.equal(indices, diagonal), K.floatx()) return self.attention_regularizer_weight * K.sum( K.square( K.batch_dot(attention, K.permute_dimensions(attention, (0, 2, 1))) - eye)) / batch_size
def extract_image_patches(X, ksizes, ssizes, border_mode="same", dim_ordering="tf"): ''' Extract the patches from an image Parameters ---------- X : The input image ksizes : 2-d tuple with the kernel size ssizes : 2-d tuple with the strides size border_mode : 'same' or 'valid' dim_ordering : 'tf' or 'th' Returns ------- The (k_w,k_h) patches extracted TF ==> (batch_size,w,h,k_w,k_h,c) TH ==> (batch_size,w,h,c,k_w,k_h) ''' kernel = [1, ksizes[0], ksizes[1], 1] strides = [1, ssizes[0], ssizes[1], 1] padding = _preprocess_border_mode(border_mode) if dim_ordering == "th": X = KTF.permute_dimensions(X, (0, 2, 3, 1)) bs_i, w_i, h_i, ch_i = KTF.int_shape(X) patches = tf.extract_image_patches(X, kernel, strides, [1, 1, 1, 1], padding) # Reshaping to fit Theano bs, w, h, ch = KTF.int_shape(patches) patches = tf.reshape( tf.transpose(tf.reshape(patches, [bs, w, h, -1, ch_i]), [0, 1, 2, 4, 3]), [bs, w, h, ch_i, ksizes[0], ksizes[1]]) if dim_ordering == "tf": patches = KTF.permute_dimensions(patches, [0, 1, 2, 4, 5, 3]) return patches
def call(self, inputs, mask=None, **kwargs): input_len = K.shape(inputs)[1] if self.attention_type == Attention.ATTENTION_TYPE_ADD: e = self._call_additive_emission(inputs) elif self.attention_type == Attention.ATTENTION_TYPE_MUL: e = self._call_multiplicative_emission(inputs) if self.attention_activation is not None: e = self.attention_activation(e) if self.attention_width is not None: if self.history_only: lower = K.arange(0, input_len) - (self.attention_width - 1) else: lower = K.arange(0, input_len) - self.attention_width // 2 lower = K.expand_dims(lower, axis=-1) upper = lower + self.attention_width indices = K.expand_dims(K.arange(0, input_len), axis=0) e -= 10000.0 * (1.0 - K.cast(lower <= indices, K.floatx()) * K.cast(indices < upper, K.floatx())) if mask is not None: mask = K.expand_dims(K.cast(mask, K.floatx()), axis=-1) e -= 10000.0 * ((1.0 - mask) * (1.0 - K.permute_dimensions(mask, (0, 2, 1)))) # a_{t} = \text{softmax}(e_t) e = K.exp(e - K.max(e, axis=-1, keepdims=True)) a = e / K.sum(e, axis=-1, keepdims=True) # l_t = \sum_{t'} a_{t, t'} x_{t'} v = K.batch_dot(a, inputs) if self.attention_regularizer_weight > 0.0: self.add_loss(self._attention_regularizer(a)) if self.return_attention: return [v, a] return v
def extract_image_patches(X, ksizes, strides, rates, padding='valid', data_format='channels_first'): ''' Extract the patches from an image Parameters ---------- X : The input image ksizes : 2-d tuple with the kernel size strides : 2-d tuple with the strides size padding : 'same' or 'valid' data_format : 'channels_last' or 'channels_first' Returns ------- The (k_w,k_h) patches extracted TF ==> (batch_size,w,h,k_w,k_h,c) TH ==> (batch_size,w,h,c,k_w,k_h) https://github.com/farizrahman4u/keras-contrib/blob/master/keras_contrib/backend/theano_backend.py ''' if padding == 'same': padding = 'ignore_borders' if data_format == 'channels_first': X = KTF.permute_dimensions(X, [0, 2, 3, 1]) # Thanks to https://github.com/awentzonline for the help! patches = TF.extract_image_patches(X, ksizes, strides, rates, padding.upper()) return patches
def gram_matrix(x): features = KTF.batch_flatten(KTF.permute_dimensions(x, (2, 0, 1))) gram = KTF.dot(features, KTF.transpose(features)) return gram