def call(self, inputs, **kwargs): embedded_split = K.reshape(inputs, shape=(self.N, self.M, -1)) center = K.l2_normalize(K.mean(embedded_split, axis=1), axis=-1) center_except = K.l2_normalize(K.reshape( K.sum(embedded_split, axis=1, keepdims=True) - embedded_split, shape=(self.N * self.M, -1)), axis=-1) similarity = K.concatenate([ K.concatenate([ K.sum(center_except[i * self.M:(i + 1) * self.M, :] * embedded_split[j, :, :], axis=1, keepdims=True) if i == j else K.sum( center[i:(i + 1), :] * embedded_split[j, :, :], axis=1, keepdims=True) for i in range(self.N) ], axis=1) for j in range(self.N) ], axis=0) similarity = self.w * similarity + self.b return similarity
def call(self, inputs, **kwargs): if not inputs.shape[0]: return inputs recurrent_input = ops.convert_to_tensor(inputs) if not self._mixed_precision_policy.should_cast_variables: recurrent_input = math_ops.cast(recurrent_input, self.dtype) batch_size = recurrent_input.shape[0] # Flatten last two dimensions, but along dimension [2] flat_recurrent = K.reshape( K.permute_dimensions(recurrent_input, (0, 2, 1)), (batch_size, -1)) outputs = gen_math_ops.mat_mul( flat_recurrent, tf.math.multiply(self.recurrent_kernel, self.recurrent_mask)) if self.use_bias: outputs = nn.bias_add(outputs, self.bias) if self.activation is not None: outputs = self.activation(outputs) # Transform back outputs to original shape outputs = K.reshape( K.transpose(outputs), (self.target_shape[0], self.target_shape[1], batch_size)) outputs = K.reshape( outputs, (self.target_shape[1], self.target_shape[0], batch_size)) outputs = K.permute_dimensions(outputs, (2, 1, 0)) return outputs
def call(self, inputs, **kwargs): inputs_shape = K.shape(inputs) mask = K.cast(K.squeeze(K.any(K.not_equal(inputs, 0.), axis=(-2, -1), keepdims=True), axis=-1), dtype=inputs.dtype) inputs_to_lstm = K.reshape(inputs, (-1, inputs.shape[-2], inputs.shape[-1])) inputs_embed = super(InferenceSpeakerEmbedding, self).call(inputs_to_lstm) inputs_embed = K.reshape( inputs_embed, (inputs_shape[0], inputs_shape[1], inputs_embed.shape[-1])) inputs_embed = inputs_embed * mask n = K.sum(mask, axis=1) inputs_embed = K.sum(inputs_embed, axis=1) / n return inputs_embed
def call(self, inputs, **kwargs): main_input, embedding_matrix = inputs input_shape_tensor = K.shape(main_input) last_input_dim = K.int_shape(main_input)[-1] emb_input_dim, emb_output_dim = K.int_shape(embedding_matrix) projected = K.dot(K.reshape(main_input, (-1, last_input_dim)), self.embedding_weights['projection']) if self.add_biases: projected = K.bias_add(projected, self.embedding_weights['biases'], data_format='channels_last') if 0 < self.projection_dropout < 1: projected = K.in_train_phase( lambda: K.dropout(projected, self.projection_dropout), projected, training=kwargs.get('training')) attention = K.dot(projected, K.transpose(embedding_matrix)) if self.scaled_attention: # scaled dot-product attention, described in # "Attention is all you need" (https://arxiv.org/abs/1706.03762) sqrt_d = K.constant(math.sqrt(emb_output_dim), dtype=K.floatx()) attention = attention / sqrt_d result = K.reshape( self.activation(attention), (input_shape_tensor[0], input_shape_tensor[1], emb_input_dim)) return result
def simple_context(X, mask): desc, head = X[:, :parameters.max_len_desc, :], X[:, parameters. max_len_desc:, :] head_activations, head_words = head[:, :, :parameters. activation_rnn_size], head[:, :, parameters. activation_rnn_size:] desc_activations, desc_words = desc[:, :, :parameters. activation_rnn_size], desc[:, :, parameters. activation_rnn_size:] activation_energies = K.batch_dot(head_activations, desc_activations, axes=(2, 2)) activation_energies = activation_energies + -1e20 * K.expand_dims( 1. - K.cast(mask[:, :parameters.max_len_desc], 'float32'), 1) activation_energies = K.reshape(activation_energies, (-1, parameters.max_len_desc)) activation_weights = K.softmax(activation_energies) activation_weights = K.reshape( activation_weights, (-1, parameters.max_len_head, parameters.max_len_desc)) desc_avg_word = K.batch_dot(activation_weights, desc_words, axes=(2, 1)) return K.concatenate((desc_avg_word, head_words))
def call(self, inputs): if K.dtype(inputs) != 'int32': inputs = K.cast(inputs, 'int32') def _l2normalize(v, eps=1e-12): return v / (K.sum(v ** 2) ** 0.5 + eps) def power_iteration(W, u): #Accroding the paper, we only need to do power iteration one time. _u = u _v = _l2normalize(K.dot(_u, K.transpose(W))) _u = _l2normalize(K.dot(_v, W)) return _u, _v W_shape = self.embeddings.shape.as_list() #Flatten the Tensor W_reshaped = K.reshape(self.embeddings, [-1, W_shape[-1]]) _u, _v = power_iteration(W_reshaped, self.u) #Calculate Sigma sigma=K.dot(_v, W_reshaped) sigma=K.dot(sigma, K.transpose(_u)) #normalize it W_bar = W_reshaped / sigma #reshape weight tensor if training in {0, False}: W_bar = K.reshape(W_bar, W_shape) else: with tf.control_dependencies([self.u.assign(_u)]): W_bar = K.reshape(W_bar, W_shape) self.embeddings = W_bar out = K.gather(self.embeddings, inputs) return out
def energy_step(inputs, states): assert_msg = "States must be a list. However states {} is of type {}".format( states, type(states)) assert isinstance(states, list) or isinstance(states, tuple), assert_msg en_seq_len, en_hidden = encoder_out_seq.shape[ 1], encoder_out_seq.shape[2] de_hidden = inputs.shape[-1] reshaped_enc_outputs = K.reshape(encoder_out_seq, (-1, en_hidden)) W_a_dot_s = K.reshape(K.dot(reshaped_enc_outputs, self.W_a), (-1, en_seq_len, en_hidden)) if verbose: print('wa.s > ', W_a_dot_s.shape) U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a), 1) # (batch_size, 1, latent_dim) if verbose: print('Ua.h > ', U_a_dot_h.shape) reshaped_Ws_plus_Uh = K.tanh( K.reshape(W_a_dot_s + U_a_dot_h, (-1, en_hidden))) if verbose: print('Ws+Uh > ', reshaped_Ws_plus_Uh.shape) e_i = K.reshape(K.dot(reshaped_Ws_plus_Uh, self.V_a), (-1, en_seq_len)) e_i = K.softmax(e_i) if verbose: print('ei > ', e_i.shape) return e_i, [e_i]
def call(self, inputs, training=None): def _l2normalize(v, eps=1e-12): return v / (K.sum(v**2)**0.5 + eps) def power_iteration(W, u): _u = u _v = _l2normalize(K.dot(_u, K.transpose(W))) _u = _l2normalize(K.dot(_v, W)) return _u, _v if self.spectral_normalization: W_shape = self.kernel.shape.as_list() # Flatten the Tensor W_reshaped = K.reshape(self.kernel, [-1, W_shape[-1]]) _u, _v = power_iteration(W_reshaped, self.u) # Calculate Sigma sigma = K.dot(_v, W_reshaped) sigma = K.dot(sigma, K.transpose(_u)) # normalize it W_bar = W_reshaped / sigma # reshape weight tensor if training in {0, False}: W_bar = K.reshape(W_bar, W_shape) else: with tf.control_dependencies([self.u.assign(_u)]): W_bar = K.reshape(W_bar, W_shape) # update weitht self.kernel = W_bar if self.rank == 1: outputs = K.conv1d(inputs, self.kernel, strides=self.strides[0], padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate[0]) if self.rank == 2: outputs = K.conv2d(inputs, self.kernel, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) if self.rank == 3: outputs = K.conv3d(inputs, self.kernel, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) if self.use_bias: outputs = K.bias_add(outputs, self.bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs
def call(self, inputs): inputs = ops.convert_to_tensor(inputs) input_shape = K.int_shape(inputs) if self.arg_array: broadcast_shape = [1] * (len(input_shape) - 1) + [input_shape[-1]] broadcast_a = K.reshape(self.get_a, broadcast_shape) broadcast_b = K.reshape(self.get_b, broadcast_shape) broadcast_l = K.reshape( K.constant(self.low_bound, dtype=self.dtype), broadcast_shape) broadcast_s = K.reshape( K.constant(self.sup_bound - self.low_bound, dtype=self.dtype), broadcast_shape) else: broadcast_a = self.get_a broadcast_b = self.get_b broadcast_l = K.constant(self.low_bound, dtype=self.dtype) broadcast_s = K.constant(self.sup_bound - self.low_bound, dtype=self.dtype) y = broadcast_l + broadcast_s * math_ops.sigmoid(broadcast_a * inputs + broadcast_b) if self.with_sum == 'i': y = math_ops.cumsum(y, axis=-1) elif self.with_sum == 'd': y = math_ops.cumsum(y, axis=-1, reverse=True) return y
def captcha_metric(y_true, y_pred): y_pred = K.reshape(y_pred, (-1, alphabet)) y_true = K.reshape(y_true, (-1, alphabet)) y_p = K.argmax(y_pred, axis=1) y_t = K.argmax(y_true, axis=1) r = K.mean(K.cast(K.equal(y_p, y_t), 'float32')) return r
def call(self, x, n_frame, fold_div=3): nt, h, w, c = x.shape x = K.reshape(x, (-1, n_frame, h, w, c)) fold = c // fold_div last_fold = c - (fold_div - 1) * fold out1, out2, out3 = tf.split(x, [fold, fold, last_fold], axis=-1) # Shift left padding_1 = tf.zeros_like(out1) padding_1 = padding_1[:, -1, :, :, :] padding_1 = tf.expand_dims(padding_1, 1) _, out1 = tf.split(out1, [1, n_frame - 1], axis=1) out1 = tf.concat([out1, padding_1], axis=1) # Shift right padding_2 = tf.zeros_like(out2) padding_2 = padding_2[:, 0, :, :, :] padding_2 = tf.expand_dims(padding_2, 1) out2, _ = tf.split(out2, [n_frame - 1, 1], axis=1) out2 = tf.concat([padding_2, out2], axis=1) out = tf.concat([out1, out2, out3], axis=-1) out = K.reshape(out, (-1, h, w, c)) return out
def call(self, x, mask=None): features_dim = self.features_dim step_dim = self.step_dim #xw = K.reshape(K.dot(x[0], K.reshape(self.W, (features_dim, features_dim))), (-1, features_dim)) #yavg=K.reshape(K.mean(K.mean(x[1], axis=1, keepdims=True),axis=0, keepdims=True), (features_dim,-1)) xw1 = K.dot(x[0], K.reshape(self.W1, (features_dim, features_dim))) xw2 = K.dot(x[1], K.reshape(self.W2, (features_dim, features_dim))) xw1t = K.permute_dimensions(xw1, [0, 2, 1]) xw2t = K.permute_dimensions(xw2, [0, 2, 1]) xw11 = K.batch_dot(xw1, xw1t) / (step_dim**0.5) xw12 = K.batch_dot(xw1, xw2t) / (step_dim**0.5) s11 = self.ll * K.softmax(xw11) s12 = (1 - self.ll) * K.softmax(xw12) eij = s11 + s12 print(eij.get_shape()) V = x[0] * K.mean(eij, axis=2, keepdims=True) if self.get_alpha: return eij else: if self.get_sequence: return V else: return K.sum(V, axis=1)
def normalize_func(mean_batch, variance_batch): mean_batch = K.reshape(mean_batch, broadcast_shape) variance_batch = K.reshape(variance_batch, broadcast_shape) mean_weights = K.softmax(self.mean_weights, axis=0) variance_weights = K.softmax(self.variance_weights, axis=0) mean = (mean_weights[0] * mean_instance + mean_weights[1] * mean_layer + mean_weights[2] * mean_batch) variance = (variance_weights[0] * variance_instance + variance_weights[1] * variance_layer + variance_weights[2] * variance_batch) outputs = (inputs - mean) / (K.sqrt(variance + self.epsilon)) if self.scale: broadcast_gamma = K.reshape(self.gamma, broadcast_shape) outputs = outputs * broadcast_gamma if self.center: broadcast_beta = K.reshape(self.beta, broadcast_shape) outputs = outputs + broadcast_beta return outputs
def call(self, x, mask=None): embedding_dim = self.embedding_dim sequence_length = self.sequence_length eij = K.reshape( K.dot(K.reshape(x, (-1, embedding_dim)), K.reshape(self.W, (embedding_dim, 1))), (-1, sequence_length)) if self.bias: eij += self.b eij = K.tanh(eij) a = K.exp(eij) if mask is not None: a *= K.cast(mask, K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) weighted_input = x * K.expand_dims(a) output = K.sum(weighted_input, axis=1) if self.return_attentions: return output, a else: return output
def call(self, inputs, output_shape=None): updates, mask = inputs[0], inputs[1] mask = tf.cast(mask, 'int32') input_shape = tf.shape(updates, out_type='int32') # calculation new shape if output_shape is None: output_shape = (input_shape[0], input_shape[1] * self.size[0], input_shape[2] * self.size[1], input_shape[3]) # calculation indices for batch, height, width and feature maps one_like_mask = K.ones_like(mask, dtype='int32') batch_shape = K.concatenate([[input_shape[0]], [1], [1], [1]], axis=0) batch_range = K.reshape(tf.range(output_shape[0], dtype='int32'), shape=batch_shape) b = one_like_mask * batch_range y = mask // (output_shape[2] * output_shape[3]) x = (mask // output_shape[3]) % output_shape[2] feature_range = tf.range(output_shape[3], dtype='int32') f = one_like_mask * feature_range # transpose indices & reshape update values to one dimension updates_size = tf.size(updates) indices = K.transpose( K.reshape(K.stack([b, y, x, f]), [4, updates_size])) values = K.reshape(updates, [updates_size]) ret = tf.scatter_nd(indices, values, output_shape) return ret
def call(self, inputs, **kwargs): if not (isinstance(inputs, list) and len(inputs) == 2): raise ValueError( 'You can call this layer only with a list of two tensors ' '(for keys/values and queries)') key_values_input, query_input = inputs _, value_seq_len, d_model = K.int_shape(key_values_input) query_seq_len = K.int_shape(inputs[1])[-2] # The first thing we need to do is to perform affine transformations # of the inputs to get the Queries, the Keys and the Values. kv = K.dot(K.reshape(key_values_input, [-1, d_model]), self.kv_weights) # splitting the keys, the values and the queries before further # processing pre_k, pre_v = [ K.reshape( # K.slice(kv, (0, i * d_model), (-1, d_model)), kv[:, i * d_model: (i + 1) * d_model], (-1, value_seq_len, self.num_heads, d_model // self.num_heads)) for i in range(2)] pre_q = K.reshape( K.dot(K.reshape(query_input, [-1, d_model]), self.q_weights), (-1, query_seq_len, self.num_heads, d_model // self.num_heads)) return self.attention(pre_q, pre_v, pre_k, query_seq_len, d_model, training=kwargs.get('training'))
def mrcnn_bbox_loss_graph(target_bbox, target_class_ids, pred_bbox): """Loss for Mask R-CNN bounding box refinement. target_bbox: [batch, num_rois, (dy, dx, log(dh), log(dw))] target_class_ids: [batch, num_rois]. Integer class IDs. pred_bbox: [batch, num_rois, num_classes, (dy, dx, log(dh), log(dw))] """ # Reshape to merge batch and roi dimensions for simplicity. target_class_ids = K.reshape(target_class_ids, (-1, )) target_bbox = K.reshape(target_bbox, (-1, 4)) pred_bbox = K.reshape(pred_bbox, (-1, K.int_shape(pred_bbox)[2], 4)) # Only positive ROIs contribute to the loss. And only # the right class_id of each ROI. Get their indices. positive_roi_ix = tf.where(target_class_ids > 0)[:, 0] positive_roi_class_ids = tf.cast( tf.gather(target_class_ids, positive_roi_ix), tf.int64) indices = tf.stack([positive_roi_ix, positive_roi_class_ids], axis=1) # Gather the deltas (predicted and true) that contribute to loss target_bbox = tf.gather(target_bbox, positive_roi_ix) pred_bbox = tf.gather_nd(pred_bbox, indices) # Smooth-L1 Loss loss = K.switch( tf.size(target_bbox) > 0, smooth_l1_loss(y_true=target_bbox, y_pred=pred_bbox), tf.constant(0.0)) loss = K.mean(loss) return loss
def call(self, inputs, training=None): def _l2normalize(v, eps=1e-12): return v / (K.sum(v ** 2) ** 0.5 + eps) def power_iteration(W, u): _u = u _v = _l2normalize(K.dot(_u, K.transpose(W))) _u = _l2normalize(K.dot(_v, W)) return _u, _v W_shape = self.kernel.shape.as_list() #Flatten the Tensor W_reshaped = K.reshape(self.kernel, [-1, W_shape[-1]]) _u, _v = power_iteration(W_reshaped, self.u) #Calculate Sigma sigma=K.dot(_v, W_reshaped) sigma=K.dot(sigma, K.transpose(_u)) #normalize it W_bar = W_reshaped / sigma #reshape weight tensor if training in {0, False}: W_bar = K.reshape(W_bar, W_shape) else: with tf.control_dependencies([self.u.assign(_u)]): W_bar = K.reshape(W_bar, W_shape) output = K.dot(inputs, W_bar) if self.use_bias: output = K.bias_add(output, self.bias, data_format='channels_last') if self.activation is not None: output = self.activation(output) return output
def shift(shape, stride, anchors): """Produce shifted anchors based on shape of the map and stride size. Args: shape: Shape to shift the anchors over. stride: Stride to shift the anchors with over the shape. anchors: The anchors to apply at each location. Returns: shifted anchors """ shift_x = (K.arange(0, shape[1], dtype=K.floatx()) + K.constant(0.5, dtype=K.floatx())) * stride shift_y = (K.arange(0, shape[0], dtype=K.floatx()) + K.constant(0.5, dtype=K.floatx())) * stride shift_x, shift_y = tf.meshgrid(shift_x, shift_y) shift_x = K.reshape(shift_x, [-1]) shift_y = K.reshape(shift_y, [-1]) shifts = K.stack([shift_x, shift_y, shift_x, shift_y], axis=0) shifts = K.transpose(shifts) number_of_anchors = K.shape(anchors)[0] k = K.shape(shifts)[0] # number of base points = feat_h * feat_w shifts = K.cast(K.reshape(shifts, [k, 1, 4]), K.floatx()) shifted_anchors = K.reshape(anchors, [1, number_of_anchors, 4]) + shifts shifted_anchors = K.reshape(shifted_anchors, [k * number_of_anchors, 4]) return shifted_anchors
def call(self, inputs): w = self.kernel kernel_shape = K.int_shape(self.kernel) if self.renormalize: w = K.reshape(w, [-1, kernel_shape[-1]]) sigma, u_bar = max_singular_val( w, self.u, fully_differentiable=self.fully_diff_spectral, ip=self.spectral_iterations) else: sigma, u_bar = max_singular_val( w, self.u, fully_differentiable=self.fully_diff_spectral, ip=self.spectral_iterations) sigma = K.reshape(sigma, (self.number_of_classes, 1, 1)) self.add_update(K.update(self.u, u_bar)) kernel = self.kernel self.kernel = self.kernel / sigma outputs = super(SNCondtionalDense, self).call(inputs) self.kernel = kernel return outputs
def simple_context(X, mask): """ Simple context calculation layer logic X = (batch_size, time_steps, units) time_steps are nothing but number of words in our case. """ # segregrate heading and desc desc, head = X[:, :parameters.max_len_desc, :], X[:, parameters.max_len_desc:, :] # segregrate activation and context part head_activations, head_words = head[:, :, :parameters.activation_rnn_size], head[:, :, parameters.activation_rnn_size:] desc_activations, desc_words = desc[:, :, :parameters.activation_rnn_size], desc[:, :, parameters.activation_rnn_size:] # p=(bacth_size, length_desc_words, rnn_units) # q=(bacth_size, length_headline_words, rnn_units) # K.dot(p,q) = (bacth_size, length_desc_words,length_headline_words) activation_energies = K.batch_dot(head_activations, desc_activations, axes=(2, 2)) # make sure we dont use description words that are masked out activation_energies = activation_energies + -1e20 * K.expand_dims(1. - K.cast(mask[:, :parameters.max_len_desc], 'float32'), 1) # for every head word compute weights for every desc word activation_energies = K.reshape(activation_energies, (-1, parameters.max_len_desc)) activation_weights = K.softmax(activation_energies) activation_weights = K.reshape(activation_weights, (-1, parameters.max_len_head, parameters.max_len_desc)) # for every head word compute weighted average of desc words desc_avg_word = K.batch_dot(activation_weights, desc_words, axes=(2, 1)) return K.concatenate((desc_avg_word, head_words))
def call(self, inputs): kernel_shape = K.int_shape(self.kernel) if not self.renormalize: w = K.reshape(self.kernel, (kernel_shape[0], kernel_shape[1] * kernel_shape[2] * kernel_shape[3], kernel_shape[-1])) sigma, u_bar = max_singular_val( w, self.u, fully_differentiable=self.fully_diff_spectral, ip=self.spectral_iterations) sigma = K.reshape(sigma, (self.number_of_classes, 1, 1, 1, 1)) else: w = K.reshape(self.kernel, (-1, kernel_shape[-1])) sigma, u_bar = max_singular_val( w, self.u, fully_differentiable=self.fully_diff_spectral, ip=self.spectral_iterations) self.add_update(K.update(self.u, u_bar)) kernel = self.kernel self.kernel = self.kernel / sigma outputs = super(SNConditionalConv2D, self).call(inputs) self.kernel = kernel return outputs
def call(self, inputs): kernel_shape = K.int_shape(self.kernel) if self.renormalize: w = K.reshape(self.kernel, (-1, kernel_shape[-1])) sigma, u_bar = max_singular_val( w, self.u, fully_differentiable=self.fully_diff_spectral, ip=self.spectral_iterations) else: w = tf.transpose(self.kernel, (0, 3, 1, 2)) w = K.reshape(w, [-1, kernel_shape[1] * kernel_shape[2]]) w = K.expand_dims(w, axis=-1) sigma, u_bar = max_singular_val( w, self.u, fully_differentiable=self.fully_diff_spectral, ip=self.spectral_iterations) sigma = K.reshape(sigma, [kernel_shape[0], 1, 1, kernel_shape[-1]]) self.add_update(K.update(self.u, u_bar)) kernel = self.kernel self.kernel = self.kernel / sigma outputs = super(SNConditionalDepthwiseConv2D, self).call(inputs) self.kernel = kernel return outputs
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): """Convert final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( grid_shape[::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast( input_shape[::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def energy_step(decode_outs, states): # decode_outs(batch,dim) # decoder_seq [N,30,512] 30是字符串长度 en_seq_len, en_hidden = encoder_out_seq.shape[ 1], encoder_out_seq.shape[2] # 30, 512 de_hidden = decode_outs.shape[-1] # W * h_j reshaped_enc_outputs = K.reshape( encoder_out_seq, (-1, en_hidden)) #[b,64,512]=> [b*64,512] # W_a[512x512],reshaped_enc_outputs[b*64,512] => [b*64,512] => [b,64,512] W_a_dot_s = K.reshape(K.dot(reshaped_enc_outputs, self.W_a), (-1, en_seq_len, en_hidden)) # U * S_t - 1,decode_outs[b,512],U_a[512,512] => [b,512] => [b,1,512] U_a_dot_h = K.expand_dims(K.dot(decode_outs, self.U_a), axis=1) # <= batch_size, 1, latent_dim # 这个细节很变态,其实就是完成了decoder的输出复制time(64)个,和encoder的输出【64,512】,相加的过程 # tanh ( W * h_j + U * S_t-1 + b ),[b,64,512] = [b*64,512] reshaped_Ws_plus_Uh = K.tanh( K.reshape(W_a_dot_s + U_a_dot_h, (-1, en_hidden))) # V * tanh ( W * h_j + U * S_t-1 + b ), [b*64,512]*[512,1] => [b*64,1] => [b,64] e_i = K.reshape(K.dot(reshaped_Ws_plus_Uh, self.V_a), (-1, en_seq_len)) e_i = K.softmax(e_i) return e_i, [e_i]
def channel_shuffle(x, groups): """ Channel shuffle operation from 'ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices,' https://arxiv.org/abs/1707.01083. Parameters: ---------- x : keras.backend tensor/variable/symbol Input tensor/variable/symbol. groups : int Number of groups. Returns ------- keras.backend tensor/variable/symbol Resulted tensor/variable/symbol. """ if is_channels_first(): batch, channels, height, width = x.shape else: batch, height, width, channels = x.shape # assert (channels % groups == 0) channels_per_group = channels // groups if is_channels_first(): x = K.reshape(x, shape=(-1, groups, channels_per_group, height, width)) x = K.permute_dimensions(x, pattern=(0, 2, 1, 3, 4)) x = K.reshape(x, shape=(-1, channels, height, width)) else: x = K.reshape(x, shape=(-1, height, width, groups, channels_per_group)) x = K.permute_dimensions(x, pattern=(0, 1, 2, 4, 3)) x = K.reshape(x, shape=(-1, height, width, channels)) updateshape(x) return x
def call(self, inputs): if self.data_format is None: data_format = self.data_format if self.data_format not in {'channels_first', 'channels_last'}: raise ValueError('Unknown data_format ' + str(data_format)) strides = (1,) + self.strides + (1,) x = inputs[0] cls = K.squeeze(inputs[1], axis=-1) #Kernel preprocess kernel = K.gather(self.kernel, cls) #(bs, w, h, c) kernel = tf.transpose(kernel, [1, 2, 3, 0]) #(w, h, c, bs) kernel = K.reshape(kernel, (self.kernel_size[0], self.kernel_size[1], -1)) #(w, h, c * bs) kernel = K.expand_dims(kernel, axis=-1) #(w, h, c * bs, 1) if self.data_format == 'channles_first': x = tf.transpose(x, [0, 2, 3, 1]) bs, w, h, c = K.int_shape(x) #(bs, w, h, c) x = tf.transpose(x, [1, 2, 3, 0]) #(w, h, c, bs) x = K.reshape(x, (w, h, -1)) #(w, h, c * bs) x = K.expand_dims(x, axis=0) #(1, w, h, c * bs) padding = _preprocess_padding(self.padding) outputs = tf.nn.depthwise_conv2d(x, kernel, strides=strides, padding=padding, rate=self.dilation_rate) #(1, w, h, c * bs) _, w, h, _ = K.int_shape(outputs) outputs = K.reshape(outputs, [w, h, self.filters, -1]) #(w, h, c, bs) outputs = tf.transpose(outputs, [3, 0, 1, 2]) #(bs, w, h, c) if self.bias is not None: #(num_cls, out) bias = tf.gather(self.bias, cls) #(bs, bias) bias = tf.expand_dims(bias, axis=1) bias = tf.expand_dims(bias, axis=1) #(bs, bias, 1, 1) outputs += bias if self.data_format == 'channles_first': outputs = tf.transpose(outputs, [0, 3, 1, 2]) if self.activation is not None: return self.activation(outputs) return outputs
def call(self, inputs, training=None): class_labels = K.squeeze(inputs[1], axis=1) inputs = inputs[0] input_shape = K.int_shape(inputs) reduction_axes = list(range(0, len(input_shape))) if self.axis is not None: del reduction_axes[self.axis] del reduction_axes[0] normed = inputs broadcast_shape = [1] * len(input_shape) broadcast_shape[0] = K.shape(inputs)[0] if self.axis is not None: broadcast_shape[self.axis] = input_shape[self.axis] if self.scale: broadcast_gamma = K.reshape(K.gather(self.gamma, class_labels), broadcast_shape) normed = normed * broadcast_gamma if self.center: broadcast_beta = K.reshape(K.gather(self.beta, class_labels), broadcast_shape) normed = normed + broadcast_beta return normed
def make_patches_grid(x, patch_size, patch_stride): '''Break image `x` up into a grid of patches. input shape: (channels, rows, cols) output shape: (rows, cols, channels, patch_rows, patch_cols) ''' from theano.tensor.nnet.neighbours import images2neibs # TODO: all K, no T x = K.expand_dims(x, 0) xs = K.shape(x) num_rows = 1 + (xs[-2] - patch_size) // patch_stride num_cols = 1 + (xs[-1] - patch_size) // patch_stride num_channels = xs[-3] patches = images2neibs(x, (patch_size, patch_size), (patch_stride, patch_stride), mode='valid') # neibs are sorted per-channel patches = K.reshape(patches, (num_channels, K.shape(patches)[0] // num_channels, patch_size, patch_size)) patches = K.permute_dimensions(patches, (1, 0, 2, 3)) # arrange in a 2d-grid (rows, cols, channels, px, py) patches = K.reshape( patches, (num_rows, num_cols, num_channels, patch_size, patch_size)) patches_norm = K.sqrt( K.sum(K.square(patches), axis=(2, 3, 4), keepdims=True)) return patches, patches_norm
def call(self, inputs, **kwargs): boxes = K.stop_gradient(inputs[0]) fpn = K.stop_gradient(inputs[1]) time_distributed = K.ndim(boxes) == 4 if time_distributed: boxes_shape = K.shape(boxes) fpn_shape = K.shape(fpn) new_boxes_shape = [-1] + [ boxes_shape[i] for i in range(2, K.ndim(boxes)) ] new_fpn_shape = [-1 ] + [fpn_shape[i] for i in range(2, K.ndim(fpn))] boxes = K.reshape(boxes, new_boxes_shape) fpn = K.reshape(fpn, new_fpn_shape) image_shape = K.cast(K.shape(fpn), K.floatx()) def _roi_align(args): boxes = args[0] fpn = args[1] # process the feature map x1 = boxes[:, 0] y1 = boxes[:, 1] x2 = boxes[:, 2] y2 = boxes[:, 3] fpn_shape = K.cast(K.shape(fpn), dtype=K.floatx()) norm_boxes = K.stack([ (y1 / image_shape[1] * fpn_shape[0]) / (fpn_shape[0] - 1), (x1 / image_shape[2] * fpn_shape[1]) / (fpn_shape[1] - 1), (y2 / image_shape[1] * fpn_shape[0] - 1) / (fpn_shape[0] - 1), (x2 / image_shape[2] * fpn_shape[1] - 1) / (fpn_shape[1] - 1) ], axis=1) rois = tf.image.crop_and_resize( K.expand_dims(fpn, axis=0), norm_boxes, tf.zeros((K.shape(norm_boxes)[0], ), dtype='int32'), self.crop_size) return rois roi_batch = tf.map_fn(_roi_align, elems=[boxes, fpn], dtype=K.floatx(), parallel_iterations=self.parallel_iterations) if time_distributed: roi_shape = tf.shape(roi_batch) new_roi_shape = [boxes_shape[0], boxes_shape[1]] + \ [roi_shape[i] for i in range(1, K.ndim(roi_batch))] roi_batch = tf.reshape(roi_batch, new_roi_shape) return roi_batch
def local_conv_matmul(inputs, kernel, kernel_mask, output_shape): """Apply N-D convolution with un-shared weights using a single matmul call. This method outputs `inputs . (kernel * kernel_mask)` (with `.` standing for matrix-multiply and `*` for element-wise multiply) and requires a precomputed `kernel_mask` to zero-out weights in `kernel` and hence perform the same operation as a convolution with un-shared (the remaining entries in `kernel`) weights. It also does the necessary reshapes to make `inputs` and `kernel` 2-D and `output` (N+2)-D. Arguments: inputs: (N+2)-D tensor with shape `(batch_size, channels_in, d_in1, ..., d_inN)` or `(batch_size, d_in1, ..., d_inN, channels_in)`. kernel: the unshared weights for N-D convolution, an (N+2)-D tensor of shape: `(d_in1, ..., d_inN, channels_in, d_out2, ..., d_outN, channels_out)` or `(channels_in, d_in1, ..., d_inN, channels_out, d_out2, ..., d_outN)`, with the ordering of channels and spatial dimensions matching that of the input. Each entry is the weight between a particular input and output location, similarly to a fully-connected weight matrix. kernel_mask: a float 0/1 mask tensor of shape: `(d_in1, ..., d_inN, 1, d_out2, ..., d_outN, 1)` or `(1, d_in1, ..., d_inN, 1, d_out2, ..., d_outN)`, with the ordering of singleton and spatial dimensions matching that of the input. Mask represents the connectivity pattern of the layer and is precomputed elsewhere based on layer parameters: stride, padding, and the receptive field shape. output_shape: a tuple of (N+2) elements representing the output shape: `(batch_size, channels_out, d_out1, ..., d_outN)` or `(batch_size, d_out1, ..., d_outN, channels_out)`, with the ordering of channels and spatial dimensions matching that of the input. Returns: Output (N+2)-D tensor with shape `output_shape`. """ inputs_flat = K.reshape(inputs, (K.shape(inputs)[0], -1)) kernel = kernel_mask * kernel kernel = make_2d(kernel, split_dim=K.ndim(kernel) // 2) output_flat = K.math_ops.sparse_matmul(inputs_flat, kernel, b_is_sparse=True) output = K.reshape(output_flat, [K.shape(output_flat)[0],] + output_shape.as_list()[1:]) return output
def call(self, inputs, training=None, mask=None): kwargs = {} if generic_utils.has_arg(self.layer.call, 'training'): kwargs['training'] = training uses_learning_phase = False # pylint: disable=redefined-outer-name input_shape = K.int_shape(inputs) if input_shape[0]: # batch size matters, use rnn-based implementation def step(x, _): global uses_learning_phase # pylint: disable=global-variable-undefined output = self.layer.call(x, **kwargs) if hasattr(output, '_uses_learning_phase'): uses_learning_phase = (output._uses_learning_phase or uses_learning_phase) return output, [] _, outputs, _ = K.rnn( step, inputs, initial_states=[], input_length=input_shape[1], unroll=False) y = outputs else: # No batch size specified, therefore the layer will be able # to process batches of any size. # We can go with reshape-based implementation for performance. input_length = input_shape[1] if not input_length: input_length = array_ops.shape(inputs)[1] inner_input_shape = self._get_shape_tuple((-1,), inputs, 2) # Shape: (num_samples * timesteps, ...). And track the # transformation in self._input_map. input_uid = generic_utils.object_list_uid(inputs) inputs = array_ops.reshape(inputs, inner_input_shape) self._input_map[input_uid] = inputs # (num_samples * timesteps, ...) if generic_utils.has_arg(self.layer.call, 'mask') and mask is not None: inner_mask_shape = self._get_shape_tuple((-1,), mask, 2) kwargs['mask'] = K.reshape(mask, inner_mask_shape) y = self.layer.call(inputs, **kwargs) if hasattr(y, '_uses_learning_phase'): uses_learning_phase = y._uses_learning_phase # Shape: (num_samples, timesteps, ...) output_shape = self.compute_output_shape(input_shape).as_list() output_shape = self._get_shape_tuple( (-1, input_length), y, 1, output_shape[2:]) y = array_ops.reshape(y, output_shape) # Apply activity regularizer if any: if (hasattr(self.layer, 'activity_regularizer') and self.layer.activity_regularizer is not None): regularization_loss = self.layer.activity_regularizer(y) self.add_loss(regularization_loss, inputs) if uses_learning_phase: y._uses_learning_phase = True return y
def trivial_model(num_classes, dtype='float32'): """Trivial model for ImageNet dataset.""" input_shape = (224, 224, 3) img_input = layers.Input(shape=input_shape, dtype=dtype) x = layers.Lambda(lambda x: backend.reshape(x, [-1, 224 * 224 * 3]), name='reshape')(img_input) x = layers.Dense(1, name='fc1')(x) x = layers.Dense(num_classes, name='fc1000')(x) # TODO(reedwm): Remove manual casts once mixed precision can be enabled with a # single line of code. x = backend.cast(x, 'float32') x = layers.Activation('softmax')(x) return models.Model(img_input, x, name='trivial')
def compute_mask(self, inputs, mask=None): """Computes an output mask tensor for Embedding layer. This is based on the inputs, mask, and the inner layer. If batch size is specified: Simply return the input `mask`. (An rnn-based implementation with more than one rnn inputs is required but not supported in tf.keras yet.) Otherwise we call `compute_mask` of the inner layer at each time step. If the output mask at each time step is not `None`: (E.g., inner layer is Masking or RNN) Concatenate all of them and return the concatenation. If the output mask at each time step is `None` and the input mask is not `None`:(E.g., inner layer is Dense) Reduce the input_mask to 2 dimensions and return it. Otherwise (both the output mask and the input mask are `None`): (E.g., `mask` is not used at all) Return `None`. Arguments: inputs: Tensor with shape [batch size, timesteps, ...] indicating the input to TimeDistributed. If static shape information is available for "batch size", `mask` is returned unmodified. mask: Either None (indicating no masking) or a Tensor indicating the input mask for TimeDistributed. The shape can be static or dynamic. Returns: Either None (no masking), or a [batch size, timesteps, ...] Tensor with an output mask for the TimeDistributed layer with the shape beyond the second dimension being the value of the input mask shape(if the computed output mask is none), an output mask with the shape beyond the first dimension being the value of the mask shape(if mask is not None) or output mask with the shape beyond the first dimension being the value of the computed output shape. """ # cases need to call the layer.compute_mask when input_mask is None: # Masking layer and Embedding layer with mask_zero input_shape = K.int_shape(inputs) if input_shape[0]: # batch size matters, we currently do not handle mask explicitly return mask inner_mask = mask if inner_mask is not None: inner_mask_shape = self._get_shape_tuple((-1,), mask, 2) inner_mask = K.reshape(inner_mask, inner_mask_shape) input_uid = generic_utils.object_list_uid(inputs) inner_inputs = self._input_map.get(input_uid, inputs) output_mask = self.layer.compute_mask(inner_inputs, inner_mask) if output_mask is None: if mask is None: return None # input_mask is not None, and output_mask is None: # we should return a not-None mask output_mask = mask for _ in range(2, len(K.int_shape(mask))): output_mask = K.any(output_mask, axis=-1) else: # output_mask is not None. We need to reshape it input_length = input_shape[1] if not input_length: input_length = K.shape(inputs)[1] output_mask_int_shape = K.int_shape(output_mask) if output_mask_int_shape is None: # if the output_mask does not have a static shape, # its shape must be the same as mask's if mask is not None: output_mask_int_shape = K.int_shape(mask) else: output_mask_int_shape = K.compute_output_shape(input_shape)[:-1] output_mask_shape = self._get_shape_tuple( (-1, input_length), output_mask, 1, output_mask_int_shape[1:]) output_mask = K.reshape(output_mask, output_mask_shape) return output_mask