def _kernel_constraint(self, kernel): """Radially constraints a kernel with shape (height, width, channels).""" padding = K.constant([[1, 1], [1, 1]], dtype='int32') kernel_shape = K.shape(kernel)[0] start = K.cast(kernel_shape / 2, 'int32') kernel_new = K.switch( K.cast(math_ops.floormod(kernel_shape, 2), 'bool'), lambda: kernel[start - 1:start, start - 1:start], lambda: kernel[start - 1:start, start - 1:start] + K.zeros( # pylint: disable=g-long-lambda (2, 2), dtype=kernel.dtype)) index = K.switch(K.cast(math_ops.floormod(kernel_shape, 2), 'bool'), lambda: K.constant(0, dtype='int32'), lambda: K.constant(1, dtype='int32')) while_condition = lambda index, *args: K.less(index, start) def body_fn(i, array): return i + 1, array_ops.pad(array, padding, constant_values=kernel[start + i, start + i]) _, kernel_new = control_flow_ops.while_loop( while_condition, body_fn, [index, kernel_new], shape_invariants=[ index.get_shape(), tensor_shape.TensorShape([None, None]) ]) return kernel_new
def generate_val_set(self): """ Generates the actual dataset. It uses all the functions defined above to read images from disk and create croppings. :return: K.data.Dataset """ parse_path_func = lambda x, y: self.parse_path(x, y) process_label_func = lambda x, y: self.process_label(x, y) resize_func = lambda x, y: self.resize_and_norm(x, y) crops_func = lambda x, y: self.crop_img_and_serve(x, y) filter_func = lambda x, y: K.equal(K.any(y), False) batch_size = self.batch_size n_el = len(list(self.val_id_ep_dict.keys())) ids = [] labels = [] for k, v in self.val_id_ep_dict.items(): ids.append(os.path.join(self.train_images_folder, k)) labels.append(v) id_tensor = K.constant(ids, dtype=tf.string, shape=([n_el])) label_tensor = K.constant(labels, dtype=tf.string, shape=(n_el, 4)) return (tf.data.Dataset.from_tensor_slices((id_tensor, label_tensor)) .shuffle(buffer_size=n_el) .map(parse_path_func, num_parallel_calls=AUTOTUNE) .map(process_label_func, num_parallel_calls=AUTOTUNE) # create actual one_crop .map(resize_func, num_parallel_calls=AUTOTUNE) # create actual one_crop .map(crops_func, num_parallel_calls=AUTOTUNE) # create crops of image to enlarge output .flat_map( lambda x, y: tf.data.Dataset.from_tensor_slices((x, y))) # serve crops as new dataset to flat_map array .filter(filter_func) .batch(batch_size) # defined batch_size .prefetch(AUTOTUNE) # number of batches to be prefetch. .repeat() # repeats the dataset when it is finished )
def mask_attention_if_needed(self, dot_product): """ Makes sure that (when enabled) each position (of a decoder's self-attention) cannot attend to subsequent positions. This is achieved by assigning -inf (or some large negative number) to all invalid connections. Later softmax will turn them into zeros. We need this to guarantee that decoder's predictions are based on what has happened before the position, not after. The method does nothing if masking is turned off. :param dot_product: scaled dot-product of Q and K after reshaping them to 3D tensors (batch * num_heads, rows, cols) """ if not self.use_masking: return dot_product last_dims = K.int_shape(dot_product)[-2:] low_triangle_ones = ( np.tril(np.ones(last_dims)) # to ensure proper broadcasting .reshape((1,) + last_dims)) inverse_low_triangle = 1 - low_triangle_ones close_to_negative_inf = -1e9 result = ( K.constant(low_triangle_ones, dtype=K.floatx()) * dot_product + K.constant(close_to_negative_inf * inverse_low_triangle)) return result
def triplet_loss(y_true, y_pred): margin = K.constant(1) return K.mean( K.maximum( K.constant(0), K.square(y_pred[:, 0, 0]) - 0.5 * (K.square(y_pred[:, 1, 0]) + K.square(y_pred[:, 2, 0])) + margin))
def call(self, inputs): inputs = ops.convert_to_tensor(inputs) input_shape = K.int_shape(inputs) if self.arg_array: broadcast_shape = [1] * (len(input_shape) - 1) + [input_shape[-1]] broadcast_a = K.reshape(self.get_a, broadcast_shape) broadcast_b = K.reshape(self.get_b, broadcast_shape) broadcast_l = K.reshape( K.constant(self.low_bound, dtype=self.dtype), broadcast_shape) broadcast_s = K.reshape( K.constant(self.sup_bound - self.low_bound, dtype=self.dtype), broadcast_shape) else: broadcast_a = self.get_a broadcast_b = self.get_b broadcast_l = K.constant(self.low_bound, dtype=self.dtype) broadcast_s = K.constant(self.sup_bound - self.low_bound, dtype=self.dtype) y = broadcast_l + broadcast_s * math_ops.sigmoid(broadcast_a * inputs + broadcast_b) if self.with_sum == 'i': y = math_ops.cumsum(y, axis=-1) elif self.with_sum == 'd': y = math_ops.cumsum(y, axis=-1, reverse=True) return y
def loss_CCC(seq1, seq2): """ FUNCTION NAME: loss_CCC This function implements the Concordance Correlation Coefficient (CCC) to be used as a loss function to train models. INPUT: ------ -> seq1: tensor with the true output: (num_batches, seq_len, 1) -> seq2: tensor with the predicted output: (num_batches, seq_len, 1) OUTPUT: ------- <- cccLoss: (1 - CCC) computed to be used as a CCC loss """ seq1 = K.squeeze(seq1, axis=-1) seq2 = K.squeeze(seq2, axis=-1) seq1_mean = K.mean(seq1, axis=-1, keepdims=True) seq2_mean = K.mean(seq2, axis=-1, keepdims=True) cov = (seq1-seq1_mean)*(seq2-seq2_mean) seq1_var = K.mean(K.square(seq1-seq1_mean), axis=-1, keepdims=True) seq2_var = K.mean(K.square(seq2-seq2_mean), axis=-1, keepdims=True) CCC = K.constant(2.) * cov / (seq1_var + seq2_var + K.square(seq1_mean - seq2_mean) + K.epsilon()) CCC_loss = K.constant(1.) - CCC return CCC_loss
def shift(shape, stride, anchors): """Produce shifted anchors based on shape of the map and stride size. Args: shape: Shape to shift the anchors over. stride: Stride to shift the anchors with over the shape. anchors: The anchors to apply at each location. Returns: shifted anchors """ shift_x = (K.arange(0, shape[1], dtype=K.floatx()) + K.constant(0.5, dtype=K.floatx())) * stride shift_y = (K.arange(0, shape[0], dtype=K.floatx()) + K.constant(0.5, dtype=K.floatx())) * stride shift_x, shift_y = tf.meshgrid(shift_x, shift_y) shift_x = K.reshape(shift_x, [-1]) shift_y = K.reshape(shift_y, [-1]) shifts = K.stack([shift_x, shift_y, shift_x, shift_y], axis=0) shifts = K.transpose(shifts) number_of_anchors = K.shape(anchors)[0] k = K.shape(shifts)[0] # number of base points = feat_h * feat_w shifts = K.cast(K.reshape(shifts, [k, 1, 4]), K.floatx()) shifted_anchors = K.reshape(anchors, [1, number_of_anchors, 4]) + shifts shifted_anchors = K.reshape(shifted_anchors, [k * number_of_anchors, 4]) return shifted_anchors
def _triplet_loss(_, y_pred): margin = K.constant(MARGIN) positive_dist = y_pred[:, 0] negative_dist = y_pred[:, 1] basic_loss = K.square(positive_dist) - K.square(negative_dist) + margin return K.mean(K.maximum(K.constant(0), basic_loss))
def linear_unbin_layer(tnsr): bin = K.constant((2 / 14), dtype='float32') norm = K.constant(1, dtype='float32') b = K.cast(K.argmax(tnsr), dtype='float32') a = b - norm # print('linear_unbin_layer out: {}'.format(a)) return a
def test_mini_batch(self): with self.cached_session(): # create simple FilterDetections layer layer = layers.FilterDetections() # create input with batch_size=2 boxes = np.array( [ [ [0, 0, 10, 10], # this will be suppressed [0, 0, 10, 10], ], [ [100, 100, 150, 150], [100, 100, 150, 150], # this will be suppressed ], ], dtype=K.floatx()) boxes = K.constant(boxes) classification = np.array( [ [ [0, 0.9], # this will be suppressed [0, 1], ], [ [1, 0], [0.9, 0], # this will be suppressed ], ], dtype=K.floatx()) classification = K.constant(classification) # compute output actual_boxes, actual_scores, actual_labels = layer.call( [boxes, classification]) actual_boxes = K.get_value(actual_boxes) actual_scores = K.get_value(actual_scores) actual_labels = K.get_value(actual_labels) # define expected output expected_boxes = -1 * np.ones((2, 300, 4), dtype=K.floatx()) expected_boxes[0, 0, :] = [0, 0, 10, 10] expected_boxes[1, 0, :] = [100, 100, 150, 150] expected_scores = -1 * np.ones((2, 300), dtype=K.floatx()) expected_scores[0, 0] = 1 expected_scores[1, 0] = 1 expected_labels = -1 * np.ones((2, 300), dtype=K.floatx()) expected_labels[0, 0] = 1 expected_labels[1, 0] = 0 # assert actual and expected are equal self.assertAllEqual(actual_boxes, expected_boxes) self.assertAllEqual(actual_scores, expected_scores) self.assertAllEqual(actual_labels, expected_labels)
def discriminative_instance_loss(y_true, y_pred, delta_v=0.5, delta_d=1.5, gamma=1e-3): """Discriminative loss between an output tensor and a target tensor. Args: y_true: A tensor of the same shape as y_pred. y_pred: A tensor of the vector embedding Returns: tensor: Output tensor. """ def temp_norm(ten, axis=None): if axis is None: axis = 1 if K.image_data_format( ) == 'channels_first' else K.ndim(ten) - 1 return K.sqrt(K.epsilon() + K.sum(K.square(ten), axis=axis)) rank = K.ndim(y_pred) channel_axis = 1 if K.image_data_format() == 'channels_first' else rank - 1 axes = [x for x in list(range(rank)) if x != channel_axis] # Compute variance loss cells_summed = tf.tensordot(y_true, y_pred, axes=[axes, axes]) n_pixels = K.cast(tf.count_nonzero(y_true, axis=axes), dtype=K.floatx()) + K.epsilon() n_pixels_expand = K.expand_dims(n_pixels, axis=1) + K.epsilon() mu = tf.divide(cells_summed, n_pixels_expand) delta_v = K.constant(delta_v, dtype=K.floatx()) mu_tensor = tf.tensordot(y_true, mu, axes=[[channel_axis], [0]]) L_var_1 = y_pred - mu_tensor L_var_2 = K.square(K.relu(temp_norm(L_var_1) - delta_v)) L_var_3 = tf.tensordot(L_var_2, y_true, axes=[axes, axes]) L_var_4 = tf.divide(L_var_3, n_pixels) L_var = K.mean(L_var_4) # Compute distance loss mu_a = K.expand_dims(mu, axis=0) mu_b = K.expand_dims(mu, axis=1) diff_matrix = tf.subtract(mu_b, mu_a) L_dist_1 = temp_norm(diff_matrix) L_dist_2 = K.square( K.relu(K.constant(2 * delta_d, dtype=K.floatx()) - L_dist_1)) diag = K.constant(0, dtype=K.floatx()) * tf.diag_part(L_dist_2) L_dist_3 = tf.matrix_set_diag(L_dist_2, diag) L_dist = K.mean(L_dist_3) # Compute regularization loss L_reg = gamma * temp_norm(mu) L = L_var + L_dist + K.mean(L_reg) return L
def __init__(self, learning_rate=0.01, momentum=0.0, nesterov=False, model=None, zero_penalties=True, total_iterations=0, total_iterations_wd=None, use_cosine_annealing=False, lr_multipliers=None, weight_decays=None, autorestart=None, init_verbose=True, eta_min=0, eta_max=1, t_cur=0, name="SGDW", **kwargs): if total_iterations > 1: weight_decays = _init_weight_decays(model, zero_penalties, weight_decays) eta_t = kwargs.pop('eta_t', 1.) super(SGDW, self).__init__(name, **kwargs) self._set_hyper("learning_rate", kwargs.get("lr", learning_rate)) self._set_hyper("decay", self._initial_decay) self._momentum = False if isinstance(momentum, ops.Tensor) or callable(momentum) or momentum > 0: self._momentum = True if isinstance(momentum, (int, float)) and (momentum < 0 or momentum > 1): raise ValueError("`momentum` must be between [0, 1].") self._set_hyper("momentum", momentum) self.nesterov = nesterov self.eta_min = K.constant(eta_min, name='eta_min') self.eta_max = K.constant(eta_max, name='eta_max') self.eta_t = K.variable(eta_t, dtype='float32', name='eta_t') self.t_cur = K.variable(t_cur, dtype='int64', name='t_cur') self.total_iterations = total_iterations self.total_iterations_wd = total_iterations_wd or total_iterations self.lr_multipliers = lr_multipliers self.weight_decays = weight_decays or {} self.init_verbose = init_verbose self.use_cosine_annealing = use_cosine_annealing _set_autorestart(self, autorestart, use_cosine_annealing) _check_args(self, total_iterations, use_cosine_annealing, weight_decays) self._init_lr = kwargs.get('lr', learning_rate) # to print lr_mult setup self._updates_processed = 0 # to track num calls to '_resource_apply_...' self._init_notified = False
def __init__(self, learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0., amsgrad=False, model=None, zero_penalties=True, total_iterations=0, total_iterations_wd=None, use_cosine_annealing=False, lr_multipliers=None, weight_decays=None, autorestart=None, init_verbose=True, eta_min=0, eta_max=1, t_cur=0, name="CustomOptimizer", **kwargs): if total_iterations > 1: weight_decays = _init_weight_decays(model, zero_penalties, weight_decays) eta_t = kwargs.pop('eta_t', 1.) super(CustomOptimizer, self).__init__(name, **kwargs) self._set_hyper('learning_rate', kwargs.get('lr', learning_rate)) self._set_hyper('decay', self._initial_decay) self._set_hyper('beta_1', beta_1) self._set_hyper('beta_2', beta_2) self.eta_min = K.constant(eta_min, name='eta_min') self.eta_max = K.constant(eta_max, name='eta_max') self.eta_t = K.variable(eta_t, dtype='float32', name='eta_t') self.t_cur = K.variable(t_cur, dtype='int64', name='t_cur') self.total_iterations = total_iterations self.total_iterations_wd = total_iterations_wd or total_iterations self.lr_multipliers = lr_multipliers self.weight_decays = weight_decays or {} self.init_verbose = init_verbose self.use_cosine_annealing = use_cosine_annealing self.epsilon = epsilon or backend_config.epsilon() self.amsgrad = amsgrad _set_autorestart(self, autorestart, use_cosine_annealing) _check_args(self, total_iterations, use_cosine_annealing, weight_decays) self._init_lr = kwargs.get('lr', learning_rate) # to print lr_mult setup self._updates_processed = 0 # to track num calls to '_resource_apply_...' self._init_notified = False
def call(self, inputs): inputs = ops.convert_to_tensor(inputs) input_shape = K.int_shape(inputs) # Broadcast a, b broadcast_shape = [1] * (len(input_shape)-1) + [input_shape[-1]] broadcast_a = K.reshape(self.get_a, broadcast_shape) broadcast_b = K.reshape(self.get_b, broadcast_shape) broadcast_l = K.constant(self.low_bound, dtype=self.dtype) broadcast_s = K.constant(self.sup_bound - self.low_bound, dtype=self.dtype) y = nn_ops.softmax(broadcast_a * inputs + broadcast_b, axis=-1) if self.with_sum == 'i': y = math_ops.cumsum(y, axis=-1) elif self.with_sum == 'd': y = math_ops.cumsum(y, axis=-1, reverse=True) return broadcast_l + broadcast_s * y
def _build_selection_masks(self): S = [None] * self.N for node in self.taxonomy: idx = node[self.id_key] s = np.zeros(self.N) if not node[self.leaf_key]: for child_idx in node[self.children_key]: s[child_idx] = 1 else: s[idx] = 1 S[idx] = s self.S = K.constant(S) root_idx = self.taxonomy[list(self._lineage(0))[-1]][self.id_key] self.root = K.constant([[root_idx]], dtype='int32')
def masked_vgg_loss(y_true, y_pred): mask_value = K.constant([[[-1.0, -1.0, 1.0]]]) mask_true = K.cast(K.not_equal(y_true, mask_value), K.floatx()) masked = K.mean(K.square((features_extractor(preprocess_vgg(mask_true * y_pred)) - features_extractor(preprocess_vgg(mask_true * y_true)))), axis=-1) return 0.006 * masked
def call(self, inputs, **kwargs): main_input, embedding_matrix = inputs input_shape_tensor = K.shape(main_input) last_input_dim = K.int_shape(main_input)[-1] emb_input_dim, emb_output_dim = K.int_shape(embedding_matrix) projected = K.dot(K.reshape(main_input, (-1, last_input_dim)), self.embedding_weights['projection']) if self.add_biases: projected = K.bias_add(projected, self.embedding_weights['biases'], data_format='channels_last') if 0 < self.projection_dropout < 1: projected = K.in_train_phase( lambda: K.dropout(projected, self.projection_dropout), projected, training=kwargs.get('training')) attention = K.dot(projected, K.transpose(embedding_matrix)) if self.scaled_attention: # scaled dot-product attention, described in # "Attention is all you need" (https://arxiv.org/abs/1706.03762) sqrt_d = K.constant(math.sqrt(emb_output_dim), dtype=K.floatx()) attention = attention / sqrt_d result = K.reshape( self.activation(attention), (input_shape_tensor[0], input_shape_tensor[1], emb_input_dim)) return result
def positional_signal(hidden_size: int, length: int, min_timescale: float = 1.0, max_timescale: float = 1e4): """ Helper function, constructing basic positional encoding. The code is partially based on implementation from Tensor2Tensor library https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/layers/common_attention.py """ if hidden_size % 2 != 0: raise ValueError( f"The hidden dimension of the model must be divisible by 2." f"Currently it is {hidden_size}") position = K.arange(0, length, dtype=K.floatx()) num_timescales = hidden_size // 2 log_timescale_increment = K.constant( (np.log(float(max_timescale) / float(min_timescale)) / (num_timescales - 1)), dtype=K.floatx()) inv_timescales = (min_timescale * K.exp( K.arange(num_timescales, dtype=K.floatx()) * -log_timescale_increment)) scaled_time = K.expand_dims(position, 1) * K.expand_dims(inv_timescales, 0) signal = K.concatenate([K.sin(scaled_time), K.cos(scaled_time)], axis=1) return K.expand_dims(signal, axis=0)
def __call__(self, x): regularization = backend.constant(0., dtype=x.dtype) rho_hat = backend.mean(x, axis=0) regularization += self.beta * tf.math.reduce_sum( kl_divergence(self.rho, rho_hat)) return regularization
def _instance_normalize(self, x): x_dtype = x.dtype x -= math_ops.reduce_mean(x, axis=[1, 2], keepdims=True) epsilon = K.constant(self.epsilon, dtype=x_dtype, name='epsilon') x *= math_ops.rsqrt( math_ops.reduce_mean(x**2, axis=[1, 2], keepdims=True) + epsilon) return x
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): """Convert final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( grid_shape[::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast( input_shape[::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def __call__(self, x): regularization = backend.constant(0., dtype=x.dtype) if self.l1: regularization += self.l1 * math_ops.reduce_sum(math_ops.abs(x)) if self.l2: regularization += self.l2 * math_ops.reduce_sum(math_ops.square(x)) return regularization
def _horovod_average_metrics_in_place(self, logs): logs = logs or {} reduced_logs = {} import horovod.tensorflow as hvd if self._allreduce_ranks <= 1.: self._allreduce_ranks = float(hvd.size()) # Reduce every metric among workers. Sort metrics by name # to ensure consistent order. for metric, value in sorted(logs.items()): from tensorflow.python.eager import context if context.executing_eagerly(): reduced_logs[metric] = hvd.allreduce( K.constant(value, name=metric)).numpy() else: if metric not in self._m_vars: with K.name_scope('MetricAverageCallback'): var = K.variable(value, name=metric) K.get_session().run(var.initializer) self._m_vars[metric] = var self._allreduce_ops[metric] = hvd.allreduce( var, device_dense=self._device) else: K.set_value(self._m_vars[metric], value) reduced_logs[metric] = K.get_session().run( self._allreduce_ops[metric]) # Override the reduced values back into logs dictionary # for other callbacks to use. for metric, value in reduced_logs.items(): logs[metric] = value
def non_max_suppression(scores, boxes, classes, max_boxes=10, iou_threshold=0.5, score_threshold=0.3): """ Applies Non-max suppression (NMS) to a set of boxes Arguments: scores -- tensor of shape (None,), output of yolo_filter_boxes() boxes -- tensor of shape (None, 4), output of yolo_filter_boxes() that have been scaled to the image size (see later) classes -- tensor of shape (None,), output of yolo_filter_boxes() max_boxes -- integer, maximum number of predicted boxes you'd like iou_threshold -- real value, "intersection over union" threshold used for NMS filtering score_threshold -- real value, minimum score used for NMS filtering Returns: scores -- tensor of shape (, None), predicted score for each box boxes -- tensor of shape (4, None), predicted box coordinates classes -- tensor of shape (, None), predicted class for each box Note: The "None" dimension of the output tensors has obviously to be less than max_boxes. Note also that this function will transpose the shapes of scores, boxes, classes. This is made for convenience. """ max_boxes_tensor = K.constant(max_boxes, dtype='int32') # Use tf.image.non_max_suppression() to get the list of indices corresponding to boxes you keep nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes_tensor, iou_threshold, score_threshold) # Use K.gather() to select only nms_indices from scores, boxes and classes scores = K.gather(scores, nms_indices) boxes = K.gather(boxes, nms_indices) classes = K.gather(classes, nms_indices) return scores, boxes, classes
def call(self, inputs, mask=None, training=None): inputs, relatives, memories, bias_context, bias_relative = inputs full = K.concatenate([memories, inputs], axis=1) # (batch, prev_len + seq_len, units) w_q = K.dot(inputs, self.kernel_q) # (batch, seq_len, units) w_kv = K.dot(full, self.kernel_kv) # (batch, prev_len + seq_len, units * 2) w_r = K.dot(relatives, self.kernel_r) # (batch, prev_len + seq_len, units) if self.use_bias: w_q = K.bias_add(w_q, self.bias_q) w_kv = K.bias_add(w_kv, self.bias_kv) w_r = K.bias_add(w_r, self.bias_r) if self.activation is not None: w_q = self.activation(w_q) w_kv = self.activation(w_kv) w_r = self.activation(w_r) w_k = w_kv[:, :, :self.units] # (batch, prev_len + seq_len, units) w_v = w_kv[:, :, self.units:] # (batch, prev_len + seq_len, units) w_qc = K.bias_add(w_q, bias_context) w_qc = self._reshape_to_batches(w_qc) # (batch * n_head, seq_len, units_head) w_k = self._reshape_to_batches(w_k) # (batch * n_head, prev_len + seq_len, units_head) a_context = K.batch_dot(w_qc, w_k, axes=2) # (batch * n_head, seq_len, prev_len + seq_len) w_qr = K.bias_add(w_q, bias_relative) w_qr = self._reshape_to_batches(w_qr) # (batch * n_head, seq_len, units_head) w_r = self._reshape_to_batches(w_r) # (batch * n_head, prev_len + seq_len, units_head) a_relative = K.batch_dot(w_qr, w_r, axes=2) # (batch * n_head, seq_len, prev_len + seq_len) a_relative = self._relative_shift(a_relative) # (batch * n_head, seq_len, prev_len + seq_len) att = (a_context + a_relative) / K.sqrt(K.constant(self.units_head, dtype=K.floatx())) exp = K.exp(att - K.max(att, axis=-1, keepdims=True)) q_len, k_len = K.shape(w_q)[1], K.shape(w_k)[1] indices = K.expand_dims(K.arange(0, k_len), axis=0) upper = K.expand_dims(K.arange(k_len - q_len, k_len), axis=-1) exp *= K.expand_dims(K.cast(indices <= upper, K.floatx()), axis=0) if mask is not None and mask[0] is not None: mask = K.cast(mask[0], K.floatx()) mask = K.concatenate([K.ones_like(memories[:, :, 0]), mask], axis=1) exp *= K.expand_dims(self._reshape_mask(mask), axis=1) att = exp / K.sum(exp, axis=-1, keepdims=True) if self.att_drop_layer is not None: att = self.att_drop_layer(att, training=training) w_v = self._reshape_to_batches(w_v) # (batch * n_head, prev_len + seq_len, units_head) w_o = K.batch_dot(att, w_v) # (batch * n_head, seq_len, units_head) w_o = self._reshape_from_batches(w_o) # (batch, seq_len, units) w_o = K.dot(w_o, self.kernel_o) # (batch, seq_len, units) if self.use_bias: w_o = K.bias_add(w_o, self.bias_o) if self.activation is not None: w_o = self.activation(w_o) # Add shape information to tensor when using `tf.keras` input_shape = K.int_shape(inputs) if input_shape[1] is not None: w_o = K.reshape(w_o, (-1,) + input_shape[1:]) return w_o
def call(self, inputs, **kwargs): mean = K.mean(inputs, axis=self.axis, keepdims=True) variance = K.mean( K.square(inputs - mean), axis=self.axis, keepdims=True) epsilon = K.constant(1e-5, dtype=K.floatx()) normalized_inputs = (inputs - mean) / K.sqrt(variance + epsilon) result = self.gain * normalized_inputs + self.bias return result
def test_simple_3d(self): with self.test_session(): # create simple FilterDetections layer layer = layers.FilterDetections() # create simple input boxes = np.array( [[ [0, 0, 10, 10], [0, 0, 10, 10], # this will be suppressed ]], dtype=K.floatx()) boxes = np.expand_dims(boxes, 0) boxes = K.constant(boxes) classification = np.array( [[ [0, 0.9], # this will be suppressed [0, 1], ]], dtype=K.floatx()) classification = np.expand_dims(classification, 0) classification = K.constant(classification) # compute output actual_boxes, actual_scores, actual_labels = layer.call( [boxes, classification]) actual_boxes = K.get_value(actual_boxes) actual_scores = K.get_value(actual_scores) actual_labels = K.get_value(actual_labels) # define expected output expected_boxes = -1 * np.ones((1, 1, 300, 4), dtype=K.floatx()) expected_boxes[0, 0, 0, :] = [0, 0, 10, 10] expected_scores = -1 * np.ones((1, 1, 300), dtype=K.floatx()) expected_scores[0, 0, 0] = 1 expected_labels = -1 * np.ones((1, 1, 300), dtype=K.floatx()) expected_labels[0, 0, 0] = 1 # assert actual and expected are equal self.assertAllEqual(actual_boxes, expected_boxes) self.assertAllEqual(actual_scores, expected_scores) self.assertAllEqual(actual_labels, expected_labels)
def __call__(self, x): if not self.l1 and not self.l2: return K.constant(0.) regularization = 0. if self.l1: regularization += math_ops.reduce_sum(self.l1 * math_ops.abs(x)) if self.l2: regularization += math_ops.reduce_sum(self.l2 * math_ops.square(x)) return regularization
def __call__(self, x): if not self.l1 and not self.l2: return K.constant(0.) regularization = 0. if self.l1: regularization += math_ops.reduce_sum(self.l1 * math_ops.abs(x)) if self.l2: regularization += math_ops.reduce_sum(self.l2 * math_ops.square(x)) return regularization
def dice(y_true, y_pred): eps = K.constant(1e-6) truelabels = tf.argmax(y_true, axis=-1, output_type=tf.int32) predictions = tf.argmax(y_pred, axis=-1, output_type=tf.int32) # cast->型変換,minimum2つのテンソルの要素ごとの最小値,equal->boolでかえってくる intersection = K.cast(K.sum(K.minimum(K.cast(K.equal(predictions, truelabels), tf.int32), truelabels)), tf.float32) union = tf.count_nonzero(predictions, dtype=tf.float32) + tf.count_nonzero(truelabels, dtype=tf.float32) dice = 2. * intersection / (union + eps) return dice
def _build_adjacency_matrix(self): A = np.zeros(self.size) for node in self.taxonomy: idx = node[self.id_key] A[idx, idx] = 1 # itself for ancestor in self._lineage(idx): # set a 1 so that all ancestors accumulate from idx's prediction value A[ancestor, idx] = 1 self.A = K.constant(A)
def build(self, input_shape): input_shape = tensor_shape.TensorShape(input_shape) if not input_shape.ndims: raise ValueError('Input has undefined rank:', input_shape) ndims = len(input_shape) # Convert axis to list and resolve negatives if isinstance(self.axis, int): self.axis = [self.axis] for idx, x in enumerate(self.axis): if x < 0: self.axis[idx] = ndims + x # Validate axes for x in self.axis: if x < 0 or x >= ndims: raise ValueError('Invalid axis: %d' % x) if len(self.axis) != len(set(self.axis)): raise ValueError('Duplicate axis: %s' % self.axis) if self.virtual_batch_size is not None: if self.virtual_batch_size <= 0: raise ValueError('virtual_batch_size must be a positive integer that ' 'divides the true batch size of the input Tensor') # If using virtual batches, the first dimension must be the batch # dimension and cannot be the batch norm axis if 0 in self.axis: raise ValueError('When using virtual_batch_size, the batch dimension ' 'must be 0 and thus axis cannot include 0') if self.adjustment is not None: raise ValueError('When using virtual_batch_size, adjustment cannot ' 'be specified') if self.fused in (None, True): # TODO(yaozhang): if input is not 4D, reshape it to 4D and reshape the # output back to its original shape accordingly. if self._USE_V2_BEHAVIOR: if self.fused is None: self.fused = (ndims == 4) elif self.fused and ndims != 4: raise ValueError('Batch normalization layers with fused=True only ' 'support 4D input tensors.') else: assert self.fused is not None self.fused = (ndims == 4 and self._fused_can_be_used()) # TODO(chrisying): fused batch norm is currently not supported for # multi-axis batch norm and by extension virtual batches. In some cases, # it might be possible to use fused batch norm but would require reshaping # the Tensor to 4D with the axis in 1 or 3 (preferred 1) which is # particularly tricky. A compromise might be to just support the most # common use case (turning 5D w/ virtual batch to NCHW) if self.fused: if self.axis == [1]: self._data_format = 'NCHW' elif self.axis == [3]: self._data_format = 'NHWC' else: raise ValueError('Unsupported axis, fused batch norm only supports ' 'axis == [1] or axis == [3]') axis_to_dim = {x: input_shape.dims[x].value for x in self.axis} for x in axis_to_dim: if axis_to_dim[x] is None: raise ValueError('Input has undefined `axis` dimension. Input shape: ', input_shape) self.input_spec = InputSpec(ndim=ndims, axes=axis_to_dim) if len(axis_to_dim) == 1 and self.virtual_batch_size is None: # Single axis batch norm (most common/default use-case) param_shape = (list(axis_to_dim.values())[0],) else: # Parameter shape is the original shape but with 1 in all non-axis dims param_shape = [axis_to_dim[i] if i in axis_to_dim else 1 for i in range(ndims)] if self.virtual_batch_size is not None: # When using virtual batches, add an extra dim at index 1 param_shape.insert(1, 1) for idx, x in enumerate(self.axis): self.axis[idx] = x + 1 # Account for added dimension if self.scale: self.gamma = self.add_weight( name='gamma', shape=param_shape, dtype=self._param_dtype, initializer=self.gamma_initializer, regularizer=self.gamma_regularizer, constraint=self.gamma_constraint, trainable=True, experimental_autocast=False) else: self.gamma = None if self.fused: self._gamma_const = K.constant( 1.0, dtype=self._param_dtype, shape=param_shape) if self.center: self.beta = self.add_weight( name='beta', shape=param_shape, dtype=self._param_dtype, initializer=self.beta_initializer, regularizer=self.beta_regularizer, constraint=self.beta_constraint, trainable=True, experimental_autocast=False) else: self.beta = None if self.fused: self._beta_const = K.constant( 0.0, dtype=self._param_dtype, shape=param_shape) try: # Disable variable partitioning when creating the moving mean and variance if hasattr(self, '_scope') and self._scope: partitioner = self._scope.partitioner self._scope.set_partitioner(None) else: partitioner = None self.moving_mean = self.add_weight( name='moving_mean', shape=param_shape, dtype=self._param_dtype, initializer=self.moving_mean_initializer, synchronization=tf_variables.VariableSynchronization.ON_READ, trainable=False, aggregation=tf_variables.VariableAggregation.MEAN, experimental_autocast=False) self.moving_variance = self.add_weight( name='moving_variance', shape=param_shape, dtype=self._param_dtype, initializer=self.moving_variance_initializer, synchronization=tf_variables.VariableSynchronization.ON_READ, trainable=False, aggregation=tf_variables.VariableAggregation.MEAN, experimental_autocast=False) if self.renorm: # Create variables to maintain the moving mean and standard deviation. # These are used in training and thus are different from the moving # averages above. The renorm variables are colocated with moving_mean # and moving_variance. # NOTE: below, the outer `with device` block causes the current device # stack to be cleared. The nested ones use a `lambda` to set the desired # device and ignore any devices that may be set by the custom getter. def _renorm_variable(name, shape): """Create a renorm variable.""" var = self.add_weight( name=name, shape=shape, dtype=self._param_dtype, initializer=init_ops.zeros_initializer(), synchronization=tf_variables.VariableSynchronization.ON_READ, trainable=False, aggregation=tf_variables.VariableAggregation.MEAN, experimental_autocast=False) return var with distribution_strategy_context.get_strategy( ).extended.colocate_vars_with(self.moving_mean): self.renorm_mean = _renorm_variable('renorm_mean', param_shape) self.renorm_mean_weight = _renorm_variable('renorm_mean_weight', ()) # We initialize renorm_stddev to 0, and maintain the (0-initialized) # renorm_stddev_weight. This allows us to (1) mix the average # stddev with the minibatch stddev early in training, and (2) compute # the unbiased average stddev by dividing renorm_stddev by the weight. with distribution_strategy_context.get_strategy( ).extended.colocate_vars_with(self.moving_variance): self.renorm_stddev = _renorm_variable('renorm_stddev', param_shape) self.renorm_stddev_weight = _renorm_variable('renorm_stddev_weight', ()) finally: if partitioner: self._scope.set_partitioner(partitioner) self.built = True