def positional_signal(hidden_size: int, length: int, min_timescale: float = 1.0, max_timescale: float = 1e4): """ Helper function, constructing basic positional encoding. The code is partially based on implementation from Tensor2Tensor library https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/layers/common_attention.py """ if hidden_size % 2 != 0: raise ValueError( f"The hidden dimension of the model must be divisible by 2." f"Currently it is {hidden_size}") position = K.arange(0, length, dtype=K.floatx()) num_timescales = hidden_size // 2 log_timescale_increment = K.constant( (np.log(float(max_timescale) / float(min_timescale)) / (num_timescales - 1)), dtype=K.floatx()) inv_timescales = (min_timescale * K.exp( K.arange(num_timescales, dtype=K.floatx()) * -log_timescale_increment)) scaled_time = K.expand_dims(position, 1) * K.expand_dims(inv_timescales, 0) signal = K.concatenate([K.sin(scaled_time), K.cos(scaled_time)], axis=1) return K.expand_dims(signal, axis=0)
def call(self, inputs): if self.data_format is None: data_format = self.data_format if self.data_format not in {'channels_first', 'channels_last'}: raise ValueError('Unknown data_format ' + str(data_format)) strides = (1,) + self.strides + (1,) x = inputs[0] cls = K.squeeze(inputs[1], axis=-1) #Kernel preprocess kernel = K.gather(self.kernel, cls) #(bs, w, h, c) kernel = tf.transpose(kernel, [1, 2, 3, 0]) #(w, h, c, bs) kernel = K.reshape(kernel, (self.kernel_size[0], self.kernel_size[1], -1)) #(w, h, c * bs) kernel = K.expand_dims(kernel, axis=-1) #(w, h, c * bs, 1) if self.data_format == 'channles_first': x = tf.transpose(x, [0, 2, 3, 1]) bs, w, h, c = K.int_shape(x) #(bs, w, h, c) x = tf.transpose(x, [1, 2, 3, 0]) #(w, h, c, bs) x = K.reshape(x, (w, h, -1)) #(w, h, c * bs) x = K.expand_dims(x, axis=0) #(1, w, h, c * bs) padding = _preprocess_padding(self.padding) outputs = tf.nn.depthwise_conv2d(x, kernel, strides=strides, padding=padding, rate=self.dilation_rate) #(1, w, h, c * bs) _, w, h, _ = K.int_shape(outputs) outputs = K.reshape(outputs, [w, h, self.filters, -1]) #(w, h, c, bs) outputs = tf.transpose(outputs, [3, 0, 1, 2]) #(bs, w, h, c) if self.bias is not None: #(num_cls, out) bias = tf.gather(self.bias, cls) #(bs, bias) bias = tf.expand_dims(bias, axis=1) bias = tf.expand_dims(bias, axis=1) #(bs, bias, 1, 1) outputs += bias if self.data_format == 'channles_first': outputs = tf.transpose(outputs, [0, 3, 1, 2]) if self.activation is not None: return self.activation(outputs) return outputs
def call(self, x): eij1 = K.reshape( K.dot(K.reshape(x[:, :, 0:768], (-1, self.features_dim)), K.reshape(self.W, (self.features_dim, 1))), (-1, self.step_dim)) eij1 += self.b eij1 = K.expand_dims(eij1) eij2 = K.reshape( K.dot(K.reshape(x[:, :, 768:768*2], (-1, self.features_dim)), K.reshape(self.W, (self.features_dim, 1))), (-1, self.step_dim)) eij2 += self.b eij2 = K.expand_dims(eij2) eij3 = K.reshape( K.dot(K.reshape(x[:, :, 768*2:768*3], (-1, self.features_dim)), K.reshape(self.W, (self.features_dim, 1))), (-1, self.step_dim)) eij3 += self.b eij3 = K.expand_dims(eij3) eij = keras.layers.concatenate([eij1, eij2, eij3], axis=2) print(eij) eij = K.tanh(eij) a = K.exp(eij) a /= K.cast(K.sum(a, axis=2, keepdims=True) + K.epsilon(), K.floatx()) print(a) temp = a[:,:,0:1] * x[:, :, 0:768] + a[:,:,1:2] * x[:, :, 768:768*2] + a[:,:,2:3] * x[:, :, 768*2:768*3] print(temp) return temp
def box_iou(b1, b2): '''Return IOU tensor b1: tensor, shape=(..., 4) x, y, w, h b2: tensor, shape=(j, 4) Return: iou: tensor(..., j) ''' b1 = K.expand_dims(b1, axis=-2) b1_xy = b1[..., :2] b1_wh = b1[..., 2:4] b1_wh_half = b1_wh / 2. b1_mins = b1_xy - b1_wh_half b1_maxes = b1_xy + b1_wh_half b2 = K.expand_dims(b2, axis=0) b2_xy = b2[..., :2] b2_wh = b2[..., 2:4] b2_wh_half = b2_wh / 2. b2_mins = b2_xy - b2_wh_half b2_maxes = b2_xy + b2_wh_half intersect_mins = K.maximum(b1_mins, b2_mins) intersect_maxes = K.minimum(b1_maxes, b2_maxes) intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] b1_area = b1_wh[..., 0] * b1_wh[..., 1] b2_area = b2_wh[..., 0] * b2_wh[..., 1] return intersect_area / (b1_area + b2_area - intersect_area)
def overlap(a, b): """Computes the IoU overlap of boxes in a and b. Args: a: np.array of shape (N, 4) of boxes. b: np.array of shape (K, 4) of boxes. Returns: A np.array of shape (N, K) of overlap between boxes from a and b. """ area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1]) iw = K.minimum(K.expand_dims(a[:, 2], axis=1), b[:, 2]) - \ K.maximum(K.expand_dims(a[:, 0], axis=1), b[:, 0]) ih = K.minimum(K.expand_dims(a[:, 3], axis=1), b[:, 3]) - \ K.maximum(K.expand_dims(a[:, 1], axis=1), b[:, 1]) iw = K.maximum(iw, 0) ih = K.maximum(ih, 0) ua = K.expand_dims((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), axis=1) + \ area - iw * ih ua = K.maximum(ua, K.epsilon()) intersection = iw * ih return intersection / ua
def call(self, inputs, mask=None, training=None): inputs, relatives, memories, bias_context, bias_relative = inputs full = K.concatenate([memories, inputs], axis=1) # (batch, prev_len + seq_len, units) w_q = K.dot(inputs, self.kernel_q) # (batch, seq_len, units) w_kv = K.dot(full, self.kernel_kv) # (batch, prev_len + seq_len, units * 2) w_r = K.dot(relatives, self.kernel_r) # (batch, prev_len + seq_len, units) if self.use_bias: w_q = K.bias_add(w_q, self.bias_q) w_kv = K.bias_add(w_kv, self.bias_kv) w_r = K.bias_add(w_r, self.bias_r) if self.activation is not None: w_q = self.activation(w_q) w_kv = self.activation(w_kv) w_r = self.activation(w_r) w_k = w_kv[:, :, :self.units] # (batch, prev_len + seq_len, units) w_v = w_kv[:, :, self.units:] # (batch, prev_len + seq_len, units) w_qc = K.bias_add(w_q, bias_context) w_qc = self._reshape_to_batches(w_qc) # (batch * n_head, seq_len, units_head) w_k = self._reshape_to_batches(w_k) # (batch * n_head, prev_len + seq_len, units_head) a_context = K.batch_dot(w_qc, w_k, axes=2) # (batch * n_head, seq_len, prev_len + seq_len) w_qr = K.bias_add(w_q, bias_relative) w_qr = self._reshape_to_batches(w_qr) # (batch * n_head, seq_len, units_head) w_r = self._reshape_to_batches(w_r) # (batch * n_head, prev_len + seq_len, units_head) a_relative = K.batch_dot(w_qr, w_r, axes=2) # (batch * n_head, seq_len, prev_len + seq_len) a_relative = self._relative_shift(a_relative) # (batch * n_head, seq_len, prev_len + seq_len) att = (a_context + a_relative) / K.sqrt(K.constant(self.units_head, dtype=K.floatx())) exp = K.exp(att - K.max(att, axis=-1, keepdims=True)) q_len, k_len = K.shape(w_q)[1], K.shape(w_k)[1] indices = K.expand_dims(K.arange(0, k_len), axis=0) upper = K.expand_dims(K.arange(k_len - q_len, k_len), axis=-1) exp *= K.expand_dims(K.cast(indices <= upper, K.floatx()), axis=0) if mask is not None and mask[0] is not None: mask = K.cast(mask[0], K.floatx()) mask = K.concatenate([K.ones_like(memories[:, :, 0]), mask], axis=1) exp *= K.expand_dims(self._reshape_mask(mask), axis=1) att = exp / K.sum(exp, axis=-1, keepdims=True) if self.att_drop_layer is not None: att = self.att_drop_layer(att, training=training) w_v = self._reshape_to_batches(w_v) # (batch * n_head, prev_len + seq_len, units_head) w_o = K.batch_dot(att, w_v) # (batch * n_head, seq_len, units_head) w_o = self._reshape_from_batches(w_o) # (batch, seq_len, units) w_o = K.dot(w_o, self.kernel_o) # (batch, seq_len, units) if self.use_bias: w_o = K.bias_add(w_o, self.bias_o) if self.activation is not None: w_o = self.activation(w_o) # Add shape information to tensor when using `tf.keras` input_shape = K.int_shape(inputs) if input_shape[1] is not None: w_o = K.reshape(w_o, (-1,) + input_shape[1:]) return w_o
def layer(x): x_mean = K.expand_dims(K.mean(x, axis=1), axis=1) x_max = K.expand_dims(K.max(x, axis=1), axis=1) x = concatenate([x, x_mean, x_max], axis=1) x = building_block(filters)(x) x = Conv3D(classes, 1, data_format=DATA_FORMAT)(x) return x
def _roi_align(args): boxes = args[0] scores = args[1] fpn = args[2] # compute from which level to get features from target_levels = self.map_to_level(boxes) # process each pyramid independently rois, ordered_indices = [], [] for i in range(len(fpn)): # select the boxes and classification from this pyramid level indices = tf.where(K.equal(target_levels, i)) ordered_indices.append(indices) level_boxes = tf.gather_nd(boxes, indices) fpn_shape = K.cast(K.shape(fpn[i]), dtype=K.floatx()) # convert to expected format for crop_and_resize x1 = level_boxes[:, 0] y1 = level_boxes[:, 1] x2 = level_boxes[:, 2] y2 = level_boxes[:, 3] level_boxes = K.stack([ (y1 / image_shape[1] * fpn_shape[0]) / (fpn_shape[0] - 1), (x1 / image_shape[2] * fpn_shape[1]) / (fpn_shape[1] - 1), (y2 / image_shape[1] * fpn_shape[0] - 1) / (fpn_shape[0] - 1), (x2 / image_shape[2] * fpn_shape[1] - 1) / (fpn_shape[1] - 1), ], axis=1) if(len(fpn[i].get_shape()) >=4): unstack = tf.unstack(fpn[i], axis=3) temp_stack=[] for j in unstack: temp = tf.image.crop_and_resize( K.expand_dims(j, axis=3), level_boxes, tf.zeros((K.shape(level_boxes)[0],), dtype='int32'), (self.crop_size[0], self.crop_size[1])) temp_stack.append(temp) rois.append(temp_stack) else: rois.append(tf.image.crop_and_resize( K.expand_dims(fpn[i], axis=0), level_boxes, tf.zeros((K.shape(level_boxes)[0],), dtype='int32'), self.crop_size )) # concatenate rois to one blob rois = K.concatenate(rois, axis=0) # reorder rois back to original order indices = K.concatenate(ordered_indices, axis=0) rois = tf.scatter_nd(indices, rois, K.cast(K.shape(rois), 'int64')) return rois
def discriminative_instance_loss(y_true, y_pred, delta_v=0.5, delta_d=1.5, gamma=1e-3): """Discriminative loss between an output tensor and a target tensor. Args: y_true: A tensor of the same shape as y_pred. y_pred: A tensor of the vector embedding Returns: tensor: Output tensor. """ def temp_norm(ten, axis=None): if axis is None: axis = 1 if K.image_data_format( ) == 'channels_first' else K.ndim(ten) - 1 return K.sqrt(K.epsilon() + K.sum(K.square(ten), axis=axis)) rank = K.ndim(y_pred) channel_axis = 1 if K.image_data_format() == 'channels_first' else rank - 1 axes = [x for x in list(range(rank)) if x != channel_axis] # Compute variance loss cells_summed = tf.tensordot(y_true, y_pred, axes=[axes, axes]) n_pixels = K.cast(tf.count_nonzero(y_true, axis=axes), dtype=K.floatx()) + K.epsilon() n_pixels_expand = K.expand_dims(n_pixels, axis=1) + K.epsilon() mu = tf.divide(cells_summed, n_pixels_expand) delta_v = K.constant(delta_v, dtype=K.floatx()) mu_tensor = tf.tensordot(y_true, mu, axes=[[channel_axis], [0]]) L_var_1 = y_pred - mu_tensor L_var_2 = K.square(K.relu(temp_norm(L_var_1) - delta_v)) L_var_3 = tf.tensordot(L_var_2, y_true, axes=[axes, axes]) L_var_4 = tf.divide(L_var_3, n_pixels) L_var = K.mean(L_var_4) # Compute distance loss mu_a = K.expand_dims(mu, axis=0) mu_b = K.expand_dims(mu, axis=1) diff_matrix = tf.subtract(mu_b, mu_a) L_dist_1 = temp_norm(diff_matrix) L_dist_2 = K.square( K.relu(K.constant(2 * delta_d, dtype=K.floatx()) - L_dist_1)) diag = K.constant(0, dtype=K.floatx()) * tf.diag_part(L_dist_2) L_dist_3 = tf.matrix_set_diag(L_dist_2, diag) L_dist = K.mean(L_dist_3) # Compute regularization loss L_reg = gamma * temp_norm(mu) L = L_var + L_dist + K.mean(L_reg) return L
def broadcast_sum(a, b): # Going from (batch_size, a1, d3) to (batch_size, a1, 1, d3) a = K.expand_dims(a, 2) # Going from (batch_size, b1, d3) to (batch_size, 1, b1, d3) b = K.expand_dims(b, 1) # Will be broadcast to (batch_size, a1, b1, d3) c = a + b # Going from (batch_size, a1, b1, d3) to (batch_size, a1*b2, d3) cs = K.shape(c) new_shape = K.concatenate([cs[0:1], cs[1:2] * cs[2:3], cs[3:4]]) return K.reshape(c, new_shape)
def tp_score(y_true, y_pred, threshold=0.1): tp_3d = K.concatenate([ K.cast(K.expand_dims(K.flatten(y_true)), 'bool'), K.cast( K.expand_dims(K.flatten(K.greater(y_pred, K.constant(threshold)))), 'bool'), K.cast(K.ones_like(K.expand_dims(K.flatten(y_pred))), 'bool') ], axis=1) tp = K.sum(K.cast(K.all(tp_3d, axis=1), 'int32')) return tp
def call(self, inputs, **kwargs): batch_size, input_len, _ = inputs.shape q = K.expand_dims(K.dot(inputs, self.Wq), 2) k = K.expand_dims(K.dot(inputs, self.Wk), 1) h = tf.tanh(q + k + self.bh) e = K.dot(h, self.Wv) + self.ba # e = K.reshape(e, shape=(batch_size, input_len, input_len)) e = tf.reshape(e, shape=(batch_size, input_len, input_len)) e = K.exp(e - K.max(e, axis=-1, keepdims=True)) s = K.sum(e, axis=-1, keepdims=True) a = e / (s + K.epsilon()) v = K.batch_dot(a, inputs) return v
def call(self, inputs, **kwargs): length = K.shape(inputs[0])[1] + K.shape(inputs[1])[1] inputs = K.tile( K.expand_dims(K.arange(length - 1, -1, -1, dtype=K.floatx()), axis=0), [K.shape(inputs[0])[0], 1], ) if self.clamp_len is not None: inputs = K.clip(inputs, min_value=0, max_value=self.clamp_len) inputs = K.expand_dims(inputs, axis=-1) output_dim = K.cast(self.output_dim, K.floatx()) ranges = K.expand_dims(K.arange(0.0, self.output_dim, 2.0), axis=0) / output_dim inverse = 1.0 / K.pow(10000.0, ranges) positions = inputs * inverse return K.concatenate([K.sin(positions), K.cos(positions)], axis=-1)
def call(self, x, mask=None): if mask is not None: # mask (batch, time) mask = K.cast(mask, K.floatx()) if K.ndim(x) != K.ndim(mask): mask = K.repeat(mask, x.shape[-1]) mask = tf.transpose(mask, [0, 2, 1]) x = x * mask if K.ndim(x) == 2: x = K.expand_dims(x) return K.sum(x, axis=self.axis) else: if K.ndim(x) == 2: x = K.expand_dims(x) return K.sum(x, axis=self.axis)
def get_initial_state(self, inputs): initial_state = K.zeros_like(inputs) initial_state = K.sum(initial_state, axis=(1, 2)) initial_state = K.expand_dims(initial_state) initial_state = K.tile(initial_state, [1, self.units]) # (samples, output_dim) n = K.identity(initial_state) d = K.identity(initial_state) h = K.identity(initial_state) dtype = initial_state.dtype.name min_value = np.array([1E38]).astype(dtype).item() a_max = K.identity(initial_state) - min_value h = h + self.cell.recurrent_activation(K.expand_dims(self.cell.initial_attention, axis=0)) return [n, d, h, a_max]
def context_step(inputs, states): """ Step function for computing ci using ei """ # (batch_size, lat1+lat2+...+latn) = (batch_size, h1*h2*...*hn, lat1+lat2+...+latn) * (batch_size, h1*h2*...*hn, 1) c_i = K.sum(hiddens_combined * K.expand_dims(inputs, -1), axis=1) if verbose: print('ci>', K.int_shape(c_i)) return c_i, states
def context_step(inputs, states): """ Step function for computing ci using ei """ # <= batch_size, hidden_size c_i = K.sum(encoder_out_seq * K.expand_dims(inputs, -1), axis=1) if verbose: print('ci>', c_i.shape) return c_i, [c_i]
def call(self, inputs): kernel_shape = K.int_shape(self.kernel) if self.renormalize: w = K.reshape(self.kernel, (-1, kernel_shape[-1])) sigma, u_bar = max_singular_val( w, self.u, fully_differentiable=self.fully_diff_spectral, ip=self.spectral_iterations) else: w = tf.transpose(self.kernel, (0, 3, 1, 2)) w = K.reshape(w, [-1, kernel_shape[1] * kernel_shape[2]]) w = K.expand_dims(w, axis=-1) sigma, u_bar = max_singular_val( w, self.u, fully_differentiable=self.fully_diff_spectral, ip=self.spectral_iterations) sigma = K.reshape(sigma, [kernel_shape[0], 1, 1, kernel_shape[-1]]) self.add_update(K.update(self.u, u_bar)) kernel = self.kernel self.kernel = self.kernel / sigma outputs = super(SNConditionalDepthwiseConv2D, self).call(inputs) self.kernel = kernel return outputs
def call(self, x, mask=None): eij = dot_product(x, self.W) if self.bias: eij += self.b eij = K.tanh(eij) a = K.exp(eij) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number ε to the sum. # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) weighted_input = x * K.expand_dims(a) result = K.sum(weighted_input, axis=1) if self.return_attention: return [result, a] return result
def call(self, inputs, mask=None): # output = softmax(score) k, q = inputs if len(q.shape) == 2: q = K.expand_dims(q, axis=1) # k: (?, K_LEN, EMBED_DIM,) # q: (?, Q_LEN, EMBED_DIM,) # score: (?, Q_LEN, K_LEN,) if self.score_function == 'scaled_dot_product': kt = K.permute_dimensions(k, (0, 2, 1)) qkt = K.batch_dot(q, kt) score = qkt / self.EMBED_DIM elif self.score_function == 'mlp': kq = K.concatenate([k, q], axis=1) kqw2 = K.tanh(K.dot(kq, self.W2)) score = K.permute_dimensions(K.dot(self.W1, kqw2), (1, 0, 2)) elif self.score_function == 'bi_linear': qw = K.dot(q, self.W) kt = K.permute_dimensions(k, (0, 2, 1)) score = K.batch_dot(qw, kt) else: raise RuntimeError('invalid score_function') score = K.softmax(score) # if mask is not None: # score *= K.cast(mask[0], K.floatx()) # output: (?, Q_LEN, EMBED_DIM,) output = K.batch_dot(score, k) return output
def energy_step(decode_outs, states): # decode_outs(batch,dim) # decoder_seq [N,30,512] 30是字符串长度 en_seq_len, en_hidden = encoder_out_seq.shape[ 1], encoder_out_seq.shape[2] # 30, 512 de_hidden = decode_outs.shape[-1] # W * h_j reshaped_enc_outputs = K.reshape( encoder_out_seq, (-1, en_hidden)) #[b,64,512]=> [b*64,512] # W_a[512x512],reshaped_enc_outputs[b*64,512] => [b*64,512] => [b,64,512] W_a_dot_s = K.reshape(K.dot(reshaped_enc_outputs, self.W_a), (-1, en_seq_len, en_hidden)) # U * S_t - 1,decode_outs[b,512],U_a[512,512] => [b,512] => [b,1,512] U_a_dot_h = K.expand_dims(K.dot(decode_outs, self.U_a), axis=1) # <= batch_size, 1, latent_dim # 这个细节很变态,其实就是完成了decoder的输出复制time(64)个,和encoder的输出【64,512】,相加的过程 # tanh ( W * h_j + U * S_t-1 + b ),[b,64,512] = [b*64,512] reshaped_Ws_plus_Uh = K.tanh( K.reshape(W_a_dot_s + U_a_dot_h, (-1, en_hidden))) # V * tanh ( W * h_j + U * S_t-1 + b ), [b*64,512]*[512,1] => [b*64,1] => [b,64] e_i = K.reshape(K.dot(reshaped_Ws_plus_Uh, self.V_a), (-1, en_seq_len)) e_i = K.softmax(e_i) return e_i, [e_i]
def create_inital_state(inputs, hidden_size): # We are not using initial states, but need to pass something to K.rnn funciton fake_state = K.zeros_like(inputs) # <= (batch_size, enc_seq_len, latent_dim fake_state = K.sum(fake_state, axis=[1, 2]) # <= (batch_size) fake_state = K.expand_dims(fake_state) # <= (batch_size, 1) fake_state = K.tile(fake_state, [1, hidden_size]) # <= (batch_size, latent_dim return fake_state
def loss_function(target_subtoken, y_pred): # prediction is a probability, log probability for speed and smoothness print("Model objective: y_pred.shape: {}".format(y_pred.shape)) # I_C = vector of a target subtoken exist in the input token - TODO probably not ok, debug using TF eager I_C = K.expand_dims( K.cast(K.any(K.equal(input_code_subtoken, K.cast(target_subtoken, 'int32')), axis=-1), dtype='float32'), -1) print("Model objective: I_C.shape: {}".format(I_C.shape)) # I_C shape = [batch_size, token, max_char_len, 1] # TODO should I add a penality if there is no subtokens appearing in the model ? Yes probability_correct_copy = K.log(copy_probability) + K.log( K.sum(I_C * copy_weights) + mu) print("Model objective: probability_correct_copy.shape: {}".format( probability_correct_copy.shape)) # penalise the model when cnn-attention predicts unknown # but the value can be predicted from the copy mechanism. mask_unknown = K.cast(K.equal(target_subtoken, unknown_id), dtype='float32') * mu probability_target_token = K.sum( K.log(1 - copy_probability) + K.log(y_pred) + mask_unknown, -1, True) print("Model objective: probability_target_token.shape: {}".format( probability_target_token.shape)) loss = K.logsumexp( [probability_correct_copy, probability_target_token]) return K.mean(loss)
def call(self, inputs, **kwargs): inputs = inputs if isinstance(inputs, list) else [inputs] if len(inputs) < 1 or len(inputs) > 2: raise ValueError("AttentionLayer expect one or two inputs.") actual_input = inputs[0] mask = inputs[1] if len(inputs) > 1 else None if mask is not None and not ( ((len(mask.shape) == 3 and mask.shape[2] == 1) or len(mask.shape) == 2) and mask.shape[1] == self.input_length): raise ValueError( "`mask` should be of shape (batch, input_length) or (batch, input_length, 1) " "when calling an AttentionLayer.") assert actual_input.shape[-1] == self.attention_param.shape[0] # (batch, input_length, input_dim) * (input_dim, 1) ==> (batch, input_length, 1) attention_weights = K.dot(actual_input, self.attention_param) if mask is not None: if len(mask.shape) == 2: mask = K.expand_dims(mask, axis=2) # (batch, input_length, 1) mask = K.log(mask) attention_weights += mask attention_weights = K.softmax(attention_weights, axis=1) # (batch, input_length, 1) result = K.sum( actual_input * attention_weights, axis=1) # (batch, input_length) [multiplication uses broadcast] return result, attention_weights
def call(self, inputs): input_shape = self.in_shape if self.data_format == 'channels_first': x = K.arange(0, input_shape[1], dtype=K.floatx()) y = K.arange(0, input_shape[2], dtype=K.floatx()) else: x = K.arange(0, input_shape[0], dtype=K.floatx()) y = K.arange(0, input_shape[1], dtype=K.floatx()) x = x / K.max(x) y = y / K.max(y) loc_x, loc_y = tf.meshgrid(x, y, indexing='ij') if self.data_format == 'channels_first': loc = K.stack([loc_x, loc_y], axis=0) else: loc = K.stack([loc_x, loc_y], axis=-1) location = K.expand_dims(loc, axis=0) if self.data_format == 'channels_first': location = K.permute_dimensions(location, pattern=[0, 2, 3, 1]) location = tf.tile(location, [K.shape(inputs)[0], 1, 1, 1]) if self.data_format == 'channels_first': location = K.permute_dimensions(location, pattern=[0, 3, 1, 2]) return location
def correlation(self, displace, kernel): """ Do the actual convolution==correlation. """ # Given an input tensor of shape [batch, in_height, in_width, in_channels] displace = K.expand_dims(displace, 0) # 在开头增加一维 # a kernel tensor of shape [filter_height, filter_width, in_channels, out_channels] kernel = K.expand_dims(kernel, 3) # 在末尾增加一维 # kernal去水平扫padding这一长条 out = K.conv2d(displace, kernel, padding='valid', data_format='channels_last') out = K.squeeze(out, 0) # 扒掉开头的维度 # print(K.int_shape(out)) # (1,360,1) return out
def energy_step(inputs, states): """ Step function for computing energy for a single decoder state inputs: (batchsize * 1 * de_in_dim) states: (batchsize * 1 * de_latent_dim) """ """ Some parameters required for shaping tensors""" en_seq_len, en_hidden = encoder_out_seq.shape[ 1], encoder_out_seq.shape[2] de_hidden = inputs.shape[-1] """ Computing S.Wa where S=[s0, s1, ..., si]""" # <= batch size * en_seq_len * latent_dim W_a_dot_s = K.dot(encoder_out_seq, self.W_a) """ Computing hj.Ua """ U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a), 1) # <= batch_size, 1, latent_dim """ tanh(S.Wa + hj.Ua) """ # <= batch_size*en_seq_len, latent_dim Ws_plus_Uh = K.tanh(W_a_dot_s + U_a_dot_h) """ softmax(va.tanh(S.Wa + hj.Ua)) """ # <= batch_size, en_seq_len e_i = K.squeeze(K.dot(Ws_plus_Uh, self.V_a), axis=-1) # <= batch_size, en_seq_len e_i = K.softmax(e_i) return e_i, [e_i]
def make_patches_grid(x, patch_size, patch_stride): '''Break image `x` up into a grid of patches. input shape: (channels, rows, cols) output shape: (rows, cols, channels, patch_rows, patch_cols) ''' from theano.tensor.nnet.neighbours import images2neibs # TODO: all K, no T x = K.expand_dims(x, 0) xs = K.shape(x) num_rows = 1 + (xs[-2] - patch_size) // patch_stride num_cols = 1 + (xs[-1] - patch_size) // patch_stride num_channels = xs[-3] patches = images2neibs(x, (patch_size, patch_size), (patch_stride, patch_stride), mode='valid') # neibs are sorted per-channel patches = K.reshape(patches, (num_channels, K.shape(patches)[0] // num_channels, patch_size, patch_size)) patches = K.permute_dimensions(patches, (1, 0, 2, 3)) # arrange in a 2d-grid (rows, cols, channels, px, py) patches = K.reshape( patches, (num_rows, num_cols, num_channels, patch_size, patch_size)) patches_norm = K.sqrt( K.sum(K.square(patches), axis=(2, 3, 4), keepdims=True)) return patches, patches_norm
def Kget_dists(X): """Keras code to compute the pairwise distance matrix for a set of vectors specifie by the matrix X. """ x2 = K.expand_dims(K.sum(K.square(X), axis=1), 1) dists = x2 + K.transpose(x2) - 2 * K.dot(X, K.transpose(X)) return dists
def energy_step(inputs, states): """ Step function for computing energy for a single decoder state """ # input: (batch_size, latent_dim) assert_msg = "States must be a list. However states {} is of type {}".format( states, type(states)) assert isinstance(states, list) or isinstance(states, tuple), assert_msg """ Computing sj.Ua """ # (batch_size, 1, d3) U_a_dot_s = K.expand_dims(K.dot(inputs, self.U_a), 1) if verbose: print('Ua.h>', K.int_shape(U_a_dot_s)) """ tanh(h.Wa + s.Ua) """ # (batch_size, h1*h2*...*hn, d3) = (batch_size, h1*h2*...*hn, d3) + (batch_size, 1, d3) Wh_plus_Us = K.tanh(W_hi + U_a_dot_s) # (batch_size, d3, h1*h2*...*hn) Wh_plus_Us = K.permute_dimensions(Wh_plus_Us, (0, 2, 1)) if verbose: print('Wh+Us>', K.int_shape(Wh_plus_Us)) """ softmax(va.tanh(S.Wa + hj.Ua)) """ # (1, batch_size, h1*h2*...*hn) = (1, d3) . (batch_size, d3, h1*h2*...*hn) Wh_plus_Us_dot_Va = K.dot(self.V_a, Wh_plus_Us) # (batch_size, h1*h2*...*hn) e_i = K.squeeze(Wh_plus_Us_dot_Va, 0) e_i = K.softmax(e_i) if verbose: print('ei>', K.int_shape(e_i)) # (batch_size, h1*h2*...*hn) return e_i, states
def get_locallyconnected_mask(input_shape, kernel_shape, strides, padding, data_format, dtype): """Return a mask representing connectivity of a locally-connected operation. This method returns a masking tensor of 0s and 1s (of type `dtype`) that, when element-wise multiplied with a fully-connected weight tensor, masks out the weights between disconnected input-output pairs and thus implements local connectivity through a sparse fully-connected weight tensor. Assume an unshared convolution with given parameters is applied to an input having N spatial dimensions with `input_shape = (d_in1, ..., d_inN)` to produce an output with spatial shape `(d_out1, ..., d_outN)` (determined by layer parameters such as `strides`). This method returns a mask which can be broadcast-multiplied (element-wise) with a 2*(N+1)-D weight matrix (equivalent to a fully-connected layer between (N+1)-D activations (N spatial + 1 channel dimensions for input and output) to make it perform an unshared convolution with given `kernel_shape`, `strides`, `padding` and `data_format`. Arguments: input_shape: tuple of size N: `(d_in1, ..., d_inN)` spatial shape of the input. kernel_shape: tuple of size N, spatial shape of the convolutional kernel / receptive field. strides: tuple of size N, strides along each spatial dimension. padding: type of padding, string `"same"` or `"valid"`. data_format: a string, `"channels_first"` or `"channels_last"`. dtype: type of the layer operation, e.g. `tf.float64`. Returns: a `dtype`-tensor of shape `(1, d_in1, ..., d_inN, 1, d_out1, ..., d_outN)` if `data_format == `"channels_first"`, or `(d_in1, ..., d_inN, 1, d_out1, ..., d_outN, 1)` if `data_format == "channels_last"`. Raises: ValueError: if `data_format` is neither `"channels_first"` nor `"channels_last"`. """ mask = conv_utils.conv_kernel_mask( input_shape=input_shape, kernel_shape=kernel_shape, strides=strides, padding=padding ) ndims = int(mask.ndim / 2) mask = K.variable(mask, dtype) if data_format == 'channels_first': mask = K.expand_dims(mask, 0) mask = K.expand_dims(mask, - ndims - 1) elif data_format == 'channels_last': mask = K.expand_dims(mask, ndims) mask = K.expand_dims(mask, -1) else: raise ValueError('Unrecognized data_format: ' + str(data_format)) return mask