def call(self, inputs, mask=None, **kwargs): """Core implemention of soft attention Args: inputs (object): input tensor. Returns: object: weighted sum of input tensors. """ attention = K.tanh(K.dot(inputs, self.W) + self.b) attention = K.dot(attention, self.q) attention = K.squeeze(attention, axis=2) if mask is None: attention = K.exp(attention) else: attention = K.exp(attention) * K.cast(mask, dtype="float32") attention_weight = attention / ( K.sum(attention, axis=-1, keepdims=True) + K.epsilon()) attention_weight = K.expand_dims(attention_weight) weighted_input = inputs * attention_weight return K.sum(weighted_input, axis=1)
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): """Convert final layer features to bounding box parameters.""" num_anchors = anchors_per_level # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile( tf.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1], name='yolo_head/tile/reshape/grid_y'), [1, grid_shape[1], 1, 1]) grid_x = K.tile( tf.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1], name='yolo_head/tile/reshape/grid_x'), [grid_shape[0], 1, 1, 1]) grid = tf.concat([grid_x, grid_y], axis=-1, name='yolo_head/concatenate/grid') grid = K.cast(grid, K.dtype(feats)) feats = tf.reshape(feats, [ -1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5 + NUM_ANGLES3 ], name='yolo_head/reshape/feats') # Adjust predictions to each spatial grid point and anchor size. box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( grid_shape[..., ::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast( input_shape[..., ::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:5 + num_classes]) polygons_confidence = K.sigmoid(feats[..., 5 + num_classes + 2:5 + num_classes + NUM_ANGLES3:3]) polygons_x = K.exp(feats[..., 5 + num_classes:num_classes + 5 + NUM_ANGLES3:3]) dx = K.square(anchors_tensor[..., 0:1] / 2) dy = K.square(anchors_tensor[..., 1:2] / 2) d = K.cast(K.sqrt(dx + dy), K.dtype(polygons_x)) a = K.pow(input_shape[..., ::-1], 2) a = K.cast(a, K.dtype(feats)) b = K.sum(a) diagonal = K.cast(K.sqrt(b), K.dtype(feats)) polygons_x = polygons_x * d / diagonal polygons_y = feats[..., 5 + num_classes + 1:num_classes + 5 + NUM_ANGLES3:3] polygons_y = K.sigmoid(polygons_y) if calc_loss == True: return grid, feats, box_xy, box_wh, polygons_confidence return box_xy, box_wh, box_confidence, box_class_probs, polygons_x, polygons_y, polygons_confidence
def yolo_head(feats, anchors, num_classes, input_shape): """Convert final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (box_xy + grid) / K.cast(grid_shape[::-1], K.dtype(feats)) box_wh = box_wh * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats)) return box_xy, box_wh, box_confidence, box_class_probs
def yolo_head(feats, anchors, num_classes): # convert anchors to shape 1 , 1 , 1 , len of anchors , 2 num_anchors = len(anchors) anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) # conv_dims , width and hight of the grid _, conv_height, conv_width, _ = K.int_shape(feats) conv_dims = K.variable([conv_width, conv_height]) # reshape yolo network output to None , grid_width , grid_hight , num of amnchors , num of classes + 5 feats = K.reshape( feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) # convert conv_dims after casting it to feats datatype to 1 , 1 , 1 , 2 conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) # create grid from (0 , 0 ) to (width , hight) conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)]) conv_index = conv_index[:, [1, 0]] # swap columns for YOLO ordering. conv_index = K.variable( conv_index.reshape(1, conv_height, conv_width, 1, 2)) box_confidence = K.sigmoid(feats[..., 4:5]) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_class_probs = K.softmax(feats[..., 5:]) box_xy = (box_xy + conv_index) / conv_dims box_wh = box_wh * anchors_tensor / conv_dims return box_confidence, box_xy, box_wh, box_class_probs
def vae_loss(self, x, z_decoded): x = K.flatten(x) z_decoded = K.flatten(z_decoded) # Reconstruction loss (as we used sigmoid activation we can use binarycrossentropy) recon_loss = keras.metrics.binary_crossentropy(x, z_decoded) # KL divergence kl_loss = -5e-4 * K.mean(1 + z_sigma - K.square(z_mu) - K.exp(z_sigma), axis=-1) return K.mean(recon_loss + kl_loss)
def get_log_probability_density(pred, y): mu_and_sigma = pred mu = mu_and_sigma[:, :2] sigma = mu_and_sigma[:, 2:] variance = K.square(sigma) pdf = 1. / K.sqrt(2. * np.pi * variance) * K.exp(-K.square(y - mu) / (2. * variance)) log_pdf = K.log(pdf + K.epsilon()) return log_pdf
def full_affinity(input_x, scale): """Calculates the symmetrized full Gaussian affinity matrix, scaled by a provided scale. Args: input_x: input dataset of size n x d scale: provided scale Returns: n x n affinity matrix """ sigma = K.variable(scale) dist_x = squared_distance(input_x) sigma_squared = K.expand_dims(K.pow(sigma, 2), -1) weight_mat = K.exp(-dist_x / (2 * sigma_squared)) return weight_mat
def full_affinity(X, scale): ''' Calculates the symmetrized full Gaussian affinity matrix, scaled by a provided scale X: input dataset of size n scale: provided scale returns: n x n affinity matrix ''' sigma = K.variable(scale) Dx = squared_distance(X) sigma_squared = K.pow(sigma, 2) sigma_squared = K.expand_dims(sigma_squared, -1) Dx_scaled = Dx / (2 * sigma_squared) W = K.exp(-Dx_scaled) return W
def _yolo_head(self, feats, anchors, num_classes, input_shape, calc_loss=False): """Convert final layer features to bounding box parameters""" num_anchors = len(anchors) anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) # height, width grid_shape = K.shape(feats)[1:3] grid_y = K.tile( K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1], ) grid_x = K.tile( K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1], ) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( grid_shape[::-1], K.dtype(feats)) box_wh = (K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def loss(y_true, y_pred): PPO_LOSS_CLIPPING = 0.2 PPO_ENTROPY_LOSS = 5 * 1e-3 # Does not converge without entropy penalty log_pdf_new = get_log_probability_density(y_pred, y_true) log_pdf_old = get_log_probability_density(old_prediction, y_true) ratio = K.exp(log_pdf_new - log_pdf_old) surrogate1 = ratio * advantage clip_ratio = K.clip(ratio, min_value=(1 - PPO_LOSS_CLIPPING), max_value=(1 + PPO_LOSS_CLIPPING)) surrogate2 = clip_ratio * advantage loss_actor = -K.mean(K.minimum(surrogate1, surrogate2)) sigma = y_pred[:, 2:] variance = K.square(sigma) loss_entropy = PPO_ENTROPY_LOSS * K.mean( -(K.log(2 * np.pi * variance) + 1) / 2) return loss_actor + loss_entropy
def __softmax_cosine_loss(y_true, y_pred, centers): y_true_value = K.argmax(y_true) base_loss = K.variable(0.0) for label in range(CONFIG["num_classes"]): center = centers[label] indices = tf.where(tf.equal(y_true_value, label)) pred_per_class = tf.gather_nd(y_pred, indices=indices) distances_per_class = __cosine(pred_per_class, center) sum = K.sum(distances_per_class, axis=-1) base_loss = tf.add(base_loss, sum) base_distances = K.zeros_like(y_true_value, dtype='float32') for label in range(CONFIG["num_classes"]): center = centers[label] distances_with_all_classes = __cosine(y_pred, center) exp_distances = K.exp(-distances_with_all_classes) base_distances = tf.add(base_distances, exp_distances) log_distances = K.log(base_distances) sum_on_batch = K.sum(log_distances) return base_loss + sum_on_batch
def call(self, x, mask=None): features_dim = self.features_dim step_dim = self.step_dim e = K.reshape( K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim)) # e = K.dot(x, self.W) if self.bias: e += self.b e = K.tanh(e) a = K.exp(e) # apply mask after the exp. will be re-normalized next if mask is not None: # cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number ε to the sum. a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) c = K.sum(a * x, axis=1) return c
def yolo_head(feats, anchors, num_classes): """Convert final layer features to bounding box parameters. Parameters ---------- feats : tensor Final convolutional layer features. anchors : array-like Anchor box widths and heights. num_classes : int Number of target classes. Returns ------- box_xy : tensor x, y box predictions adjusted by spatial location in conv layer. box_wh : tensor w, h box predictions adjusted by anchors and conv spatial resolution. box_conf : tensor Probability estimate for whether each box contains any object. box_class_pred : tensor Probability distribution estimate for each box over class labels. """ num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) # Static implementation for fixed models. # TODO: Remove or add option for static implementation. # _, conv_height, conv_width, _ = K.int_shape(feats) # conv_dims = K.variable([conv_width, conv_height]) # Dynamic implementation of conv dims for fully convolutional model. conv_dims = K.shape(feats)[1:3] # assuming channels last # In YOLO the height index is the inner most iteration. conv_height_index = K.arange(0, stop=conv_dims[0]) conv_width_index = K.arange(0, stop=conv_dims[1]) conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) # TODO: Repeat_elements and tf.split doesn't support dynamic splits. # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0) conv_width_index = K.tile( K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) conv_width_index = K.flatten(K.transpose(conv_width_index)) conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) conv_index = K.cast(conv_index, K.dtype(feats)) feats = K.reshape( feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) # Static generation of conv_index: # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)]) # conv_index = conv_index[:, [1, 0]] # swap columns for YOLO ordering. # conv_index = K.variable( # conv_index.reshape(1, conv_height, conv_width, 1, 2)) # feats = Reshape( # (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.softmax(feats[..., 5:]) # Adjust preditions to each spatial grid point and anchor size. # Note: YOLO iterates over height index before width index. box_xy = (box_xy + conv_index) / conv_dims box_wh = box_wh * anchors_tensor / conv_dims return box_xy, box_wh, box_confidence, box_class_probs
def sample_z(args): z_mu, z_sigma = args eps = K.random_normal(shape=(K.shape(z_mu)[0], K.int_shape(z_mu)[1])) return z_mu + K.exp(z_sigma / 2) * eps