def call(self, u_vecs): if self.share_weights: u_hat_vecs = K.conv1d(u_vecs, self.W) else: u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1]) batch_size = K.shape(u_vecs)[0] input_num_capsule = K.shape(u_vecs)[1] u_hat_vecs = K.reshape(u_hat_vecs, (batch_size, input_num_capsule, self.num_capsule, self.dim_capsule)) u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3)) #final u_hat_vecs.shape = [None, num_capsule, input_num_capsule, dim_capsule] b = K.zeros_like(u_hat_vecs[:,:,:,0]) #shape = [None, num_capsule, input_num_capsule] for i in range(self.routings): c = softmax(b, 1) # o = K.batch_dot(c, u_hat_vecs, [2, 2]) o = tf.einsum('bin,binj->bij', c, u_hat_vecs) if K.backend() == 'theano': o = K.sum(o, axis=1) if i < self.routings - 1: o = K.l2_normalize(o, -1) # b = K.batch_dot(o, u_hat_vecs, [2, 3]) b = tf.einsum('bij,binj->bin', o, u_hat_vecs) if K.backend() == 'theano': b = K.sum(b, axis=1) return self.activation(o)
def call(self, inputs): if self.share_weights: hat_inputs = K.conv1d(inputs, self.kernel) else: hat_inputs = K.local_conv1d(inputs, self.kernel, [1], [1]) batch_size = K.shape(inputs)[0] input_num_capsule = K.shape(inputs)[1] hat_inputs = K.reshape(hat_inputs, (batch_size, input_num_capsule, self.num_capsule, self.dim_capsule)) hat_inputs = K.permute_dimensions(hat_inputs, (0, 2, 1, 3)) b = K.zeros_like(hat_inputs[:, :, :, 0]) for i in range(self.routings): c = softmax(b, 1) o = self.activation(keras.backend.batch_dot(c, hat_inputs, [2, 2])) if i < self.routings - 1: b = keras.backend.batch_dot(o, hat_inputs, [2, 3]) if K.backend() == 'theano': o = K.sum(o, axis=1) return o
def conv1d(inputs, filter_, data_format='channels_first'): filter_ = K.expand_dims(filter_) zero = K.constant(np.zeros(filter_.shape)) filter_first_row = K.concatenate([filter_, zero], axis=0) filter_second_row = K.concatenate([zero, filter_], axis=0) full_filter = K.concatenate([filter_first_row, filter_second_row], axis=-1) return K.conv1d(inputs, full_filter, data_format=data_format)
def call(self, u_vecs): if self.share_weights: u_hat_vecs = K.conv1d(u_vecs, self.W) else: u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1]) batch_size = K.shape(u_vecs)[0] input_num_capsule = K.shape(u_vecs)[1] u_hat_vecs = K.reshape(u_hat_vecs, (batch_size, input_num_capsule, self.num_capsule, self.dim_capsule)) u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3)) # final u_hat_vecs.shape = [None, num_capsule, input_num_capsule, dim_capsule] b = K.zeros_like( u_hat_vecs[:, :, :, 0]) # shape = [None, num_capsule, input_num_capsule] for i in range(self.routings): b = K.permute_dimensions( b, (0, 2, 1)) # shape = [None, input_num_capsule, num_capsule] c = K.softmax(b) c = K.permute_dimensions(c, (0, 2, 1)) b = K.permute_dimensions(b, (0, 2, 1)) outputs = self.activation(K.batch_dot(c, u_hat_vecs, [2, 2])) if i < self.routings - 1: b = K.batch_dot(outputs, u_hat_vecs, [2, 3]) return outputs
def call(self, u_ves): print(self.W_kernel.shape) print("*****", u_ves.shape) u_ves = tf.transpose(u_ves, perm=[0, 2, 1]) print("*****", u_ves.shape) u_hat_vecs = K.conv1d(u_ves, self.W_kernel) print("*****", u_hat_vecs.shape) batch_size = tf.shape(u_ves)[0] input_num_capsule = tf.shape(u_ves)[1] u_hat_vecs = tf.reshape(u_hat_vecs, (batch_size, input_num_capsule, self.out_num_capsule, self.out_dim_capusle)) u_hat_vecs = tf.transpose( u_hat_vecs, perm=[0, 2, 1, 3] ) # finally shape = [N0ne,out_num_capsule,input_num_capsule,out_dim_capsule] # Dynamic routing b = tf.zeros_like( u_hat_vecs[:, :, :, 0]) #shape = [N0ne,out_num_capsule,input_num_capsule] for i in range(self.routings): c = softmax(b, 1) output = K.batch_dot(c, u_hat_vecs, [2, 2]) output = self.activation(output) if i < self.routings - 1: # o = tf.nn.l2_normalize(o,-1) b = b + K.batch_dot(output, u_hat_vecs, [2, 3]) pose = output print("pose is :", pose.shape) return pose
def call(self, inputs): def _l2normalize(v, eps=1e-12): return v / (K.sum(v**2)**0.5 + eps) def power_iteration(W, u): _u = u _v = _l2normalize(K.dot(_u, K.transpose(W))) _u = _l2normalize(K.dot(_v, W)) return _u, _v if self.spectral_normalization: W_shape = self.kernel.shape.as_list() #Flatten the Tensor W_reshaped = K.reshape(self.kernel, [-1, W_shape[-1]]) _u, _v = power_iteration(W_reshaped, self.u) #Calculate Sigma sigma = K.dot(_v, W_reshaped) sigma = K.dot(sigma, K.transpose(_u)) #normalize it W_bar = W_reshaped / sigma #reshape weight tensor if training in {0, False}: W_bar = K.reshape(W_bar, W_shape) else: with tf.control_dependencies([self.u.assign(_u)]): W_bar = K.reshape(W_bar, W_shape) #update weitht self.kernel = W_bar if self.rank == 1: outputs = K.conv1d(inputs, self.kernel, strides=self.strides[0], padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate[0]) if self.rank == 2: outputs = K.conv2d(inputs, self.kernel, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) if self.rank == 3: outputs = K.conv3d(inputs, self.kernel, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) if self.use_bias: outputs = K.bias_add(outputs, self.bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs
def call(self, inputs): if self.share_weights: u_hat_vectors = K.conv1d(inputs, self.W) else: u_hat_vectors = K.local_conv1d(inputs, self.W, [1], [1]) # u_hat_vectors : The spatially transformed input vectors (with local_conv_1d) batch_size = K.shape(inputs)[0] input_num_capsule = K.shape(inputs)[1] u_hat_vectors = K.reshape(u_hat_vectors, (batch_size, input_num_capsule, self.num_capsule, self.dim_capsule)) u_hat_vectors = K.permute_dimensions(u_hat_vectors, (0, 2, 1, 3)) routing_weights = K.zeros_like(u_hat_vectors[:, :, :, 0]) for i in range(self.routings): capsule_weights = K.softmax(routing_weights, 1) outputs = K.batch_dot(capsule_weights, u_hat_vectors, [2, 2]) if K.ndim(outputs) == 4: outputs = K.sum(outputs, axis=1) if i < self.routings - 1: outputs = K.l2_normalize(outputs, -1) routing_weights = K.batch_dot(outputs, u_hat_vectors, [2, 3]) if K.ndim(routing_weights) == 4: routing_weights = K.sum(routing_weights, axis=1) return self.activation(outputs)
def call(self, inputs): """Following the routing algorithm from Hinton's paper, but replace b = b + <u,v> with b = <u,v>. This change can improve the feature representation of Capsule. However, you can replace b = K.batch_dot(outputs, hat_inputs, [2, 3]) with b += K.batch_dot(outputs, hat_inputs, [2, 3]) to realize a standard routing. """ if self.share_weights: hat_inputs = K.conv1d(inputs, self.kernel) else: hat_inputs = K.local_conv1d(inputs, self.kernel, [1], [1]) batch_size = K.shape(inputs)[0] input_num_capsule = K.shape(inputs)[1] hat_inputs = K.reshape(hat_inputs, (batch_size, input_num_capsule, self.num_capsule, self.dim_capsule)) hat_inputs = K.permute_dimensions(hat_inputs, (0, 2, 1, 3)) b = K.zeros_like(hat_inputs[:, :, :, 0]) for i in range(self.routings): c = softmax(b, 1) o = self.activation(caps_batch_dot(c, hat_inputs)) if i < self.routings - 1: b = caps_batch_dot(o, hat_inputs) if K.backend() == 'theano': o = K.sum(o, axis=1) return o
def call(self, x): X = [0] * 128 for i in range(self.n_kernels): X[i] = softplus( K.conv1d(x[:, :, i:i + 1], self.kernels[i], padding='same')) K.bias_add(X[i], self.bias[i], data_format=self.data_format) X = K.concatenate(X, axis=2) return X
def tokenize(x, n, padding='same'): Y = K.argmax(x, axis=-1) if n == 1: return Y W = K.constant([[[4**k]] for k in range(n)]) Y = K.cast(Y, dtype=tf.float32) W = K.cast(W, dtype=tf.float32) Y = K.expand_dims(Y, axis=-1) Y = K.conv1d(x=Y, kernel=W, strides=1, padding=padding) Y = K.squeeze(Y, axis=-1) return Y
def call(self, x): # filters = K.zeros(shape=(N_filt, Filt_dim)) # Get beginning and end frequencies of the filters. min_freq = 50.0 min_band = 50.0 filt_beg_freq = K.abs(self.filt_b1) + min_freq / self.freq_scale filt_end_freq = filt_beg_freq + (K.abs(self.filt_band) + min_band / self.freq_scale) # Filter window (hamming). n = np.linspace(0, self.Filt_dim, self.Filt_dim) window = 0.54 - 0.46 * np.cos(2 * math.pi * n / self.Filt_dim) # window = K.cast(window, "float32") # window = tf.Variable(window, name='sincnet_window', trainable=False) # TODO what is this? t_right_linspace = np.linspace(1, (self.Filt_dim - 1) / 2, int((self.Filt_dim - 1) / 2), dtype=np.float32) t_right = np.float32(t_right_linspace / self.fs) # t_right = tf.Variable(t_right, name='sincnet_t_right', trainable=False, dtype=tf.float32) # Compute the filters. output_list = [] for i in range(self.N_filt): low_pass1 = 2 * filt_beg_freq[i] * sinc( filt_beg_freq[i] * self.freq_scale, t_right) low_pass2 = 2 * filt_end_freq[i] * sinc( filt_end_freq[i] * self.freq_scale, t_right) band_pass = (low_pass2 - low_pass1) band_pass = band_pass / K.max(band_pass) output_list.append(band_pass * window) filters = K.stack(output_list) # (80, 251) filters = K.transpose(filters) # (251, 80) filters = K.reshape( filters, (self.Filt_dim, 1, self.N_filt) ) # (251,1,80) in TF: (filter_width, in_channels, out_channels) in PyTorch (out_channels, in_channels, filter_width) ''' Given an input tensor of shape [batch, in_width, in_channels] if data_format is "NWC", or [batch, in_channels, in_width] if data_format is "NCW", and a filter / kernel tensor of shape [filter_width, in_channels, out_channels], this op reshapes the arguments to pass them to conv2d to perform the equivalent convolution operation. Internally, this op reshapes the input tensors and invokes tf.nn.conv2d. For example, if data_format does not start with "NC", a tensor of shape [batch, in_width, in_channels] is reshaped to [batch, 1, in_width, in_channels], and the filter is reshaped to [1, filter_width, in_channels, out_channels]. The result is then reshaped back to [batch, out_width, out_channels] (where out_width is a function of the stride and padding as in conv2d) and returned to the caller. ''' out = K.conv1d(x, kernel=filters) return out
def call(self, x): ''' Given an input tensor of shape [batch, in_width, in_channels] if data_format is "NWC", or [batch, in_channels, in_width] if data_format is "NCW", and a filter / kernel tensor of shape [filter_width, in_channels, out_channels], this op reshapes the arguments to pass them to conv2d to perform the equivalent convolution operation. Internally, this op reshapes the input tensors and invokes tf.nn.conv2d. For example, if data_format does not start with "NC", a tensor of shape [batch, in_width, in_channels] is reshaped to [batch, 1, in_width, in_channels], and the filter is reshaped to [1, filter_width, in_channels, out_channels]. The result is then reshaped back to [batch, out_width, out_channels] (where out_width is a function of the stride and padding as in conv2d) and returned to the caller. ''' # Do the convolution. out = K.conv1d(x, kernel=self.generate_filters()) return out
def conv1d(inputs: tf.Tensor, filter_: tf.Tensor, data_format: str = 'channels_first') -> tf.Tensor: """ Convolve filter_(1D) with each row of inputs inputs: Tensor of shape [batch_size, number_of_channels, signal_length] filter_: Tensor of shape [filter_length] """ filter_: tf.Tensor = K.expand_dims(filter_) filter_: tf.Tensor = K.expand_dims(filter_) debug_tensor(LOGGER, filter_, 'c1d.fil') zero: tf.Tensor = K.constant(np.zeros(filter_.shape)) debug_tensor(LOGGER, zero, 'c1d.zero') filter_first_row = K.concatenate([filter_, zero], axis=1) debug_tensor(LOGGER, filter_first_row, 'c1d.ffr') filter_second_row = K.concatenate([zero, filter_], axis=1) debug_tensor(LOGGER, filter_second_row, 'c1d.fsr') full_filter: tf.Tensor = K.concatenate([filter_first_row, filter_second_row], axis=2) debug_tensor(LOGGER, full_filter, 'c1d.ff') return K.conv1d(inputs, full_filter, data_format=data_format)
def call(self, inputs): if self.rank == 1: outputs = K.conv1d( inputs, self.kernel, strides=self.strides[0], padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate[0], ) if self.rank == 2: kernel = self.kernel if self.standardization: kernel_mean = K.mean(kernel, axis=[0, 1, 2], keepdims=True) kernel = kernel - kernel_mean kernel_std = K.std(kernel, axis=[0, 1, 2], keepdims=True) kernel = kernel / (kernel_std + 1e-5) outputs = K.conv2d( inputs, kernel, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate, ) if self.rank == 3: outputs = K.conv3d( inputs, self.kernel, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate, ) if self.use_bias: outputs = K.bias_add(outputs, self.bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs
def call(self, x, **kwargs): debug_print("call") # filters = K.zeros(shape=(N_filt, Filt_dim)) # Compute the filters. output_list = [] for i in range(self.N_filt): low_pass1 = ( 2 * self.filt_beg_freq[i] * sinc(self.filt_beg_freq[i] * self.freq_scale, self.t_right)) low_pass2 = ( 2 * self.filt_end_freq[i] * sinc(self.filt_end_freq[i] * self.freq_scale, self.t_right)) band_pass = low_pass2 - low_pass1 band_pass = band_pass / K.max(band_pass) output_list.append(band_pass * self.window) filters = K.stack(output_list) # (80, 251) filters = K.transpose(filters) # (251, 80) filters = K.reshape( filters, (self.Filt_dim, 1, self.N_filt) ) # (251,1,80) in TF: (filter_width, in_channels, out_channels) in # PyTorch (out_channels, in_channels, filter_width) """Given an input tensor of shape [batch, in_width, in_channels] if data_format is "NWC", or [batch, in_channels, in_width] if data_format is "NCW", and a filter / kernel tensor of shape [filter_width, in_channels, out_channels], this op reshapes the arguments to pass them to conv2d to perform the equivalent convolution operation. Internally, this op reshapes the input tensors and invokes tf.nn.conv2d. For example, if data_format does not start with "NC", a tensor of shape [batch, in_width, in_channels] is reshaped to [ batch, 1, in_width, in_channels], and the filter is reshaped to [1, filter_width, in_channels, out_channels]. The result is then reshaped back to [batch, out_width, out_channels] (where out_width is a function of the stride and padding as in conv2d) and returned to the caller. """ # Do the convolution. debug_print("call") debug_print(" x", x) debug_print(" filters", filters) out = K.conv1d(x, kernel=filters) debug_print(" out", out) return out
def call(self, inputs): if self._is_causal: # Apply causal padding to inputs for Conv1D. inputs = array_ops.pad(inputs, self._compute_causal_padding(inputs)) self.U = self.calcU() kernel = self.W * self.U outputs = K.conv1d( inputs, kernel, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) if self.use_bias: if self.data_format == 'channels_first': bias = array_ops.reshape(self.bias, (1, self.filters, 1)) outputs += bias else: outputs = nn.bias_add(outputs, self.bias, data_format='NHWC') if self.activation is not None: return self.activation(outputs) return outputs
def call(self, inputs): """Following the routing algorithm from Hinton's paper, but replace b = b + <u,v> with b = <u,v>. This change can improve the feature representation of Capsule. However, you can replace b = K.batch_dot(outputs, hat_inputs, [2, 3]) with b += K.batch_dot(outputs, hat_inputs, [2, 3]) to realize a standard routing. """ if self.share_weights: hat_inputs = K.conv1d(inputs, self.kernel) else: hat_inputs = K.local_conv1d(inputs, self.kernel, [1], [1]) batch_size = K.shape(inputs)[0] input_num_capsule = K.shape(inputs)[1] hat_inputs = K.reshape(hat_inputs, (batch_size, input_num_capsule, self.num_capsule, self.dim_capsule)) hat_inputs = K.permute_dimensions(hat_inputs, (0, 2, 1, 3)) b = K.zeros_like(hat_inputs[:, :, :, 0]) for i in range(self.routings): c = softmax(b, 1) # o = self.activation(K.batch_dot(c, hat_inputs, [2, 2])) # [2, 2] o = tf.einsum('bin,binj->bij', c, hat_inputs) # print(2, o.shape) if i < self.routings - 1: o = K.l2_normalize(o, -1) # b = K.batch_dot(o, hat_inputs, [2, 3]) b = tf.einsum('bij,binj->bin', o, hat_inputs) return o
def attention(self, pre_q, pre_v, pre_k, out_seq_len: int, d_model: int, mask=None, training=None): """ Calculates the output of the attention once the affine transformations of the inputs are done. Here's the shapes of the arguments: :param pre_q: (batch_size, q_seq_len, num_heads, d_model // num_heads) :param pre_v: (batch_size, v_seq_len, num_heads, d_model // num_heads) :param pre_k: (batch_size, k_seq_len, num_heads, d_model // num_heads) :param out_seq_len: the length of the output sequence :param d_model: dimensionality of the model (by the paper) :param training: Passed by Keras. Should not be defined manually. Optional scalar tensor indicating if we're in training or inference phase. """ # shaping Q and V into (batch_size, num_heads, seq_len, d_model//heads) q = K.permute_dimensions(pre_q, [0, 2, 1, 3]) v = K.permute_dimensions(pre_v, [0, 2, 1, 3]) if self.compression_window_size is None: k_transposed = K.permute_dimensions(pre_k, [0, 2, 3, 1]) else: # Memory-compressed attention described in paper # "Generating Wikipedia by Summarizing Long Sequences" # (https://arxiv.org/pdf/1801.10198.pdf) # It compresses keys and values using 1D-convolution which reduces # the size of Q * K_transposed from roughly seq_len^2 # to convoluted_seq_len^2. If we use strided convolution with # window size = 3 and stride = 3, memory requirements of such # memory-compressed attention will be 9 times smaller than # that of the original version. if self.use_masking: raise NotImplementedError( "Masked memory-compressed attention has not " "been implemented yet") k = K.permute_dimensions(pre_k, [0, 2, 1, 3]) k, v = [ K.reshape( # Step 3: Return the result to its original dimensions # (batch_size, num_heads, seq_len, d_model//heads) K.bias_add( # Step 3: ... and add bias K.conv1d( # Step 2: we "compress" K and V using strided conv K.reshape( # Step 1: we reshape K and V to # (batch + num_heads, seq_len, d_model//heads) item, (-1, K.int_shape(item)[-2], d_model // self.num_heads)), kernel, strides=self.compression_window_size, padding='valid', data_format='channels_last'), bias, data_format='channels_last'), # new shape K.concatenate( [K.shape(item)[:2], [-1, d_model // self.num_heads]])) for item, kernel, bias in ((k, self.k_conv_kernel, self.k_conv_bias), (v, self.v_conv_kernel, self.v_conv_bias)) ] k_transposed = K.permute_dimensions(k, [0, 1, 3, 2]) # shaping K into (batch_size, num_heads, d_model//heads, seq_len) # for further matrix multiplication sqrt_d = K.constant(np.sqrt(d_model // self.num_heads), dtype=K.floatx()) q_shape = tf.shape(q) k_t_shape = tf.shape(k_transposed) v_shape = tf.shape(v) # before performing batch_dot all tensors are being converted to 3D # shape (batch_size * num_heads, rows, cols) to make sure batch_dot # performs identically on all backends attention_heads = K.reshape( K.batch_dot( self.apply_dropout_if_needed(K.softmax( self.mask_attention_if_needed(K.batch_dot( K.reshape(q, tf.stack((-1, q_shape[-2], q_shape[-1]))), K.reshape(k_transposed, tf.stack( (-1, k_t_shape[-2], k_t_shape[-1])))) / sqrt_d, mask=mask)), training=training), K.reshape(v, tf.stack((-1, v_shape[-2], v_shape[-1])))), tf.stack((-1, self.num_heads, q_shape[-2], v_shape[-1]))) attention_heads_merged = K.reshape( K.permute_dimensions(attention_heads, [0, 2, 1, 3]), (-1, d_model)) if out_seq_len is None: output_shape = tf.stack([-1, tf.shape(pre_k)[1], d_model]) else: output_shape = (-1, out_seq_len, d_model) attention_out = K.reshape( K.dot(attention_heads_merged, self.output_weights), output_shape) return attention_out
def call(self, x): out = K.conv1d(x, kernel=self.filters) return out