def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): num_anchors = len(anchors) anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( grid_shape[::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast( input_shape[::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def call(self, x): m = K.shape(x)[0] s = self.grid_size b = self.num_boxes c = self.num_classes class_probs_end = s * s * c box_confs_end = class_probs_end + s * s * b # class probabilities class_probs = K.reshape(x[:, :class_probs_end], (m, s, s, c)) if self.softmax_class_probs: class_probs = K.softmax(class_probs) # box confidence scores box_confs = K.reshape(x[:, class_probs_end:box_confs_end], (m, s, s, b)) if self.sigmoid_box_confs: box_confs = K.sigmoid(box_confs) # box coordinates box_coords = K.reshape(x[:, box_confs_end:], (m, s, s, b * 4)) if self.sigmoid_box_coords: box_coords = K.sigmoid(box_coords) outputs = K.concatenate([class_probs, box_confs, box_coords]) return outputs
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): """转换识别结果 例如:(batch_size,13,13,255) -> (batch_size,13,13,3,85) """ num_anchors = len(anchors) anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # 特征层高和宽 grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) # 生成 特征层网格点坐标 # 如(13,13)特征层面,[[(0,0)..(0,12)]..[(12,0)..[12,12]]] grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # 网格点坐标(特征层中心点)+识别结果(偏移量) box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats)) if calc_loss == True: return grid, feats, box_xy, box_wh else: box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) # todo:这里调用激活函数是起到什么作用 return box_xy, box_wh, box_confidence, box_class_probs
def _calculate_features(self, xy, wh, objectiveness, classes, anchors): shape = K.shape(xy)[1:3] # width, height xy_sig = K.sigmoid(xy) # TODO rethink logic here, grid needs to be calculated just once after model initialization col = K.reshape(K.tile(K.arange(0, shape[0]), shape[0:1]), (-1, shape[0])) row = K.reshape(K.tile(K.arange(0, shape[1]), shape[1:2]), (-1, shape[1])) row = K.transpose(row) col = K.repeat_elements(K.reshape(col, (shape[0], shape[1], 1, 1)), rep=len(anchors), axis=-2) row = K.repeat_elements(K.reshape(row, (shape[0], shape[1], 1, 1)), rep=len(anchors), axis=-2) grid = K.concatenate((col, row), axis=-1) # TODO same thing for the anchors anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, len(anchors), 2]) box_xy = (xy_sig + K.cast(grid, K.dtype(xy_sig))) / (shape[0], shape[1]) box_wh = K.exp(wh) * anchors_tensor / K.cast(self.input_image_dims, K.dtype(wh)) obj_sig = K.sigmoid(objectiveness) class_sig = K.sigmoid(classes) return box_xy, box_wh, obj_sig, class_sig
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): num_anchors = len(anchors) # [1, 1, 1, num_anchors, 2] anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) # 获得x,y的网格 # (13,13, 1, 2) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) # (batch_size,13,13,3,85) feats = K.reshape(feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # 将预测值调成真实值 # box_xy对应框的中心点 # box_wh对应框的宽和高 box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[...,::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[...,::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) # 在计算loss的时候返回如下参数 if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def masked(): # pick cval beta = K.sigmoid(self.beta) cval = self.min_value * beta + self.max_value * (1 - beta) # determine a mask ratio = K.sigmoid(self.ratio) size = K.random_uniform([], maxval=0.2, dtype='float32') offset = K.random_uniform([], maxval=1 - size, dtype='float32') ''' ratio = K.concatenate([self.ratio, [0.]]) ratio = ratio + K.random_normal([3,], dtype='float32') ratio = K.softmax(ratio) ''' mask = K.arange(0., 1., 1 / freq, dtype='float32') ge = K.cast(K.greater_equal(mask, offset), dtype='float32') le = K.cast(K.less_equal(mask, size + offset), dtype='float32') mask = 1 - ge * le mask = K.reshape(mask, broadcast_shape) outputs = inputs * mask + cval * (1 - mask) return outputs
def yolo_head(feats, anchors, input_shape, calc_loss=False, att_map=None): """Convert final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape(feats, [-1, grid_shape[0], grid_shape[1], num_anchors, 5]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( grid_shape[..., ::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast( input_shape[..., ::-1], K.dtype(feats)) if att_map is not None: seg_map = K.tile(att_map, [1, 1, 1, 3]) seg_map = K.expand_dims(seg_map, axis=-1) box_confidence = K.sigmoid( feats[..., 4:5] ) #*.8+seg_map*.2 ##denote if add attention score to confidence score else: box_confidence = K.sigmoid(feats[..., 4:5]) if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence
def _transform_netout(self, y_pred_raw): y_pred_xy = K.sigmoid(y_pred_raw[..., :2]) + self.c_grid y_pred_wh = K.exp(y_pred_raw[..., 2:4]) * self.anchors y_pred_conf = K.sigmoid(y_pred_raw[..., 4:5]) y_pred_class = y_pred_raw[..., 5:] return K.concatenate([y_pred_xy, y_pred_wh, y_pred_conf, y_pred_class], axis=-1)
def yolo_head(feats,anchors,num_classes,input_shape,calc_loss=False): """Convert final predictions into bounding boxes""" num_anchors = len(anchors) # (batch, height, width, num_anchors, box_prams) anchor_tensor = K.reshape(K.constant(anchors),[1,1,1,num_anchors,2]) grid_shape = K.shape(feats)[1:3] #(height,width) grid_y = K.tile(K.reshape(K.arange(0,stop=grid_shape[0]),[-1,1,1,1]), [1,grid_shape[1],1,1]) grid_x = K.tile(K.reshape(K.arange(0,stop=grid_shape[1]),[1,-1,1,1]), [grid_shape[0],1,1,1]) grid = K.concatenate([grid_x,grid_y]) grid = K.cast(grid,K.dtype(feats)) feats = K.reshape( feats,[-1,grid.shape[0],grid.shape[1],num_anchors,num_classes+5]) box_xy = (K.sigmoid(feats[...,:2])+grid) / K.cast(grid_shape[::-1],K.dtype(feats)) box_wh = K.exp(feats[...,2:4]) * anchor_tensor / K.cast(input_shape[::-1],K.dtype(feats)) box_confidence = K.sigmoid(feats[...,4:5]) box_class_probs = K.sigmoid(feats[...,5:]) if calc_loss: return grid,feats,box_xy,box_wh return box_xy, box_wh, box_confidence, box_class_probs
def call(self, inputs, training=None): """ The function that takes the inputs of the layer and conducts the Dense layer multiplication with concrete dropout. Parameters: inputs (tf.Keras.Layer): The inputs to the Dense layer. training (bool): A required input for call. Setting training to true or false does nothing because concrete dropout behaves the same way in both cases. Returns: (tf.Keras.Layer): The output of the Dense layer. """ # Small epsilon parameter needed for stable optimization eps = K.cast_to_floatx(K.epsilon()) # Build the random tensor for dropout from uniform noise. This # formulation allows for a derivative with respect to p. input_shape = K.shape(inputs) noise_shape = (input_shape[0], 1, 1, input_shape[3]) unif_noise = K.random_uniform(shape=noise_shape, seed=self.random_seed) drop_prob = (K.log(K.sigmoid(self.p_logit) + eps) - K.log(1.0 - K.sigmoid(self.p_logit) + eps) + K.log(unif_noise + eps) - K.log(1.0 - unif_noise + eps)) drop_prob = K.sigmoid(drop_prob / self.temp) inputs *= (1.0 - drop_prob) inputs /= (1.0 - K.sigmoid(self.p_logit)) # Now just carry out the basic operations of a Dense layer. return super(SpatialConcreteDropout, self).call(inputs)
def _loss(y_true, y_pred): embed_01 = y_pred[:, :e_len] out_01 = y_pred[:, e_len:(e_len+n_cls)] tru_01 = y_true[:, :n_cls] embed_02 = y_pred[:, (e_len+n_cls):(e_len+n_cls+e_len)] out_02 = y_pred[:, (e_len+n_cls+e_len):(e_len+n_cls+e_len+n_cls)] tru_02 = y_true[:, n_cls:-1] true_emb = y_true[:, -1] # embed_dist = Metrics.euclidean_distance(embed_01, embed_02) embed_dist = Metrics.kullback_leibler(embed_01, embed_02) +\ Metrics.kullback_leibler(embed_02, embed_01) # embed_dist = Metrics.cosine_similarity(embed_01, embed_02) pos_embed_dist = true_emb * embed_dist neg_embed_dist = (1-true_emb) * embed_dist loss = \ Metrics.entropy(K.sigmoid(pos_embed_dist)) +\ Metrics.entropy(K.sigmoid(neg_embed_dist)) +\ Metrics.cross_entropy(tru_01, out_01) +\ Metrics.cross_entropy(tru_02, out_02) return loss
def step(cell_inputs, cell_states): """Step function that will be used by Keras RNN backend.""" h_tm1 = cell_states[0] features = self.attention(img, h_tm1) cell_inputs = K.concatenate([cell_inputs, features], axis=-1) # inputs projected by all gate matrices at once matrix_x = K.dot(cell_inputs, self.kernel) matrix_x = K.bias_add(matrix_x, self.input_bias) x_z, x_r, x_h = array_ops.split(matrix_x, 3, axis=1) # hidden state projected by all gate matrices at once matrix_inner = K.dot(h_tm1, self.recurrent_kernel) matrix_inner = K.bias_add(matrix_inner, self.recurrent_bias) recurrent_z, recurrent_r, recurrent_h = array_ops.split( matrix_inner, 3, axis=1) z = K.sigmoid(x_z + recurrent_z) r = K.sigmoid(x_r + recurrent_r) hh = K.tanh(x_h + r * recurrent_h) # previous and candidate state mixed by update gate h = z * h_tm1 + (1 - z) * hh return h, [h]
def call(self, inputs, **kwargs): W = K.tanh(self.w_hat) * K.sigmoid(self.m_hat) a = K.dot(inputs, W) m = K.exp(K.dot(K.log(K.abs(inputs) + K.epsilon()), W)) g = K.sigmoid(K.dot(inputs, self.big_g)) y = g * a + (1 - g) * m return y
def yolo3_head(feats, anchors, num_classes, input_shape, calc_loss=False): """Convert final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) conv_dims = K.shape(feats)[1:3] conv_height_index = K.arange(0, stop=conv_dims[0]) conv_width_index = K.arange(0, stop=conv_dims[1]) x_axis, y_axis = meshgrid(conv_width_index, conv_height_index) grid = K.concatenate([x_axis, y_axis]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( conv_dims[::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast( input_shape[::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def rpn_loss_regr_fixed_num(y_true, y_pred): shape = K.shape(y_true) true_reshaped = K.reshape(y_true, (C.BATCH_SIZE, 7, 7, 5, 25)) pred_reshaped = K.reshape(y_pred, (C.BATCH_SIZE, 7, 7, 5, 25)) mask = true_reshaped[:,:,:,:,4] # class_mask = K.reshape(K.repeat_elements(mask,20,3), (C.BATCH_SIZE,7,7,5,20)) # coord_mask = K.reshape(K.repeat_elements(mask,4,3), (C.BATCH_SIZE,7,7,5,4)) # object_mask = mask # no_object_mask = 1 - mask class_loss = 10 * (1 - K.categorical_crossentropy(true_reshaped[:,:,:,:,5:],K.softmax(pred_reshaped[:,:,:,:,5:]))) object_square = K.square(1 - K.sigmoid(pred_reshaped[:,:,:,:,4])) object_loss = object_lambda * K.sum(object_square) no_object_square = K.square(0 - K.sigmoid(pred_reshaped[:,:,:,:,4])) no_object_loss = object_lambda * K.sum(no_object_square) coord_square = K.square(true_reshaped[:,:,:,:,:4] - pred_reshaped[:,:,:,:,:4]) coord_loss = coord_lambda * K.sum(coord_square) return (class_loss + object_loss + no_object_loss + coord_loss)
def call(self, x, h2_prev, timestep, rand_seed=None): # NOTE: expected input shape: (batch, height, width, channel) # init h2 and w if timestep == 0: # dirty workaround as glorot_normal won't take None as batch dim if x.shape[0] == None: h2_prev = K.random_normal(K.shape(x)) else: h2_prev = keras.initializers.glorot_normal(seed=rand_seed)(x.shape) if self.batchnorm: # ReLU with recurrent batchnorm # calculate gain G(1)[t] g1 = K.sigmoid(self.bn[timestep*4](K.conv2d(h2_prev, self.u1) + self.b1)) # horizontal inhibition C(1)[t] if self.channel_sym: conv_inh = channel_sym_conv2d((g1 * h2_prev), self.w_inh) else: conv_inh = K.conv2d((g1 * h2_prev), self.w_inh, padding='same') c1 = self.bn[timestep*4+1](conv_inh) # apply gain gate and inhibition to get H(1)[t] h1 = K.relu(x - K.relu(c1 * (self.alpha * h2_prev + self.mu))) # mix gate G(2)[t] g2 = K.sigmoid(self.bn[timestep*4+2](K.conv2d(h1, self.u2) + self.b2)) # horizontal excitation C(2)[t] if self.channel_sym: conv_exc = channel_sym_conv2d(h1, self.w_exc) else: conv_exc = K.conv2d(h1, self.w_exc , padding='same') c2 = self.bn[timestep*4+3](conv_exc) # output candidate H_tilda(2)[t] via excitation h2_tilda = K.relu(self.kappa * h1 + self.beta * c2 + self.omega * h1 * c2) # apply mix gate to get H(2)[t] h2_t = g2 * h2_tilda + (1 - g2) * h2_prev else: # tanh with timestep weights, no batchnorm except at g2 g1 = K.sigmoid(K.conv2d(h2_prev, self.u1) + self.b1) if self.channel_sym: c1 = channel_sym_conv2d((g1 * h2_prev), self.w_inh) else: c1 = K.conv2d((g1 * h2_prev), self.w_inh, padding='same') h1 = K.tanh(x - c1 * (self.alpha * h2_prev + self.mu)) g2 = K.sigmoid(self.bn[timestep*4+2](K.conv2d(h1, self.u2) + self.b2)) if self.channel_sym: c2 = channel_sym_conv2d(h1, self.w_exc) else: c2 = K.conv2d(h1, self.w_exc , padding='same') h2_tilda = K.tanh(self.kappa * h1 + self.beta * c2 + self.omega * h1 * c2) h2_t = self.eta[timestep] * (g2 * h2_tilda + (1 - g2) * h2_prev) return h2_t
def loss(levels, logits): val = -K.sum( (K.log(K.sigmoid(logits)) * levels + (K.log(K.sigmoid(logits)) - logits) * (1 - levels)) * tf.convert_to_tensor(imp, dtype=tf.float32), axis=1, ) return K.mean(val)
def loss_fn(true_logits, pred_logits): imp = imp_w if imp_w is not None else tf.ones(n_classes - 1, dtype=float) val = (-K.sum( (K.log(K.sigmoid(pred_logits)) * true_logits + (K.log(K.sigmoid(pred_logits)) - pred_logits) * (1 - true_logits)) * imp, axis=1)) return K.mean(val)
def yolo4_decode(feats, anchors, num_classes, input_shape, scale_x_y=None, calc_loss=False): """Decode final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) # ---------------------------------------------------------------------------------------------------------- # 生成 grid 网格基准 (13, 13, 1, 2) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) # Reshape to ([batch_size, height, width, num_anchors, (num_classes+5)]) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # Adjust predictions to each spatial grid point and anchor size. # box_xy 数值范围调整为【0-1】(归一化) # box_wh 数值范围调整为 【0-1】(归一化),输入尺寸是使用backbone的最小特征图尺寸*stride得到的 # 强调说明一下:这里 box_xy 是相对于grid 的位置(说成input似乎也行);box_wh是相对于 input_shape大小 # scale_x_y是一个 trick,见下文链接 if scale_x_y: # Eliminate grid sensitivity trick involved in YOLOv4 # # Reference Paper & code: # "YOLOv4: Optimal Speed and Accuracy of Object Detection" # https://arxiv.org/abs/2004.10934 # https://github.com/opencv/opencv/issues/17148 # https://zhuanlan.zhihu.com/p/139724869 box_xy_tmp = K.sigmoid( feats[..., :2]) * scale_x_y - (scale_x_y - 1) / 2 box_xy = (box_xy_tmp + grid) / K.cast(grid_shape[..., ::-1], K.dtype(feats)) else: box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( grid_shape[..., ::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast( input_shape[..., ::-1], K.dtype(feats)) # sigmoid objectness scores 置信度解码 box_confidence = K.sigmoid(feats[..., 4:5]) # class probs 类别解码 box_class_probs = K.sigmoid(feats[..., 5:]) # 在计算loss的时候返回grid, feats, box_xy, box_wh # 在预测的时候返回box_xy, box_wh, box_confidence, box_class_probs if calc_loss: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def one_step(self, inputs, states): x_in, (c_last, h_last) = inputs, states print('x_in: ' + str(x_in)) print('c_last: ' + str(c_last)) print('h_last: ' + str(h_last)) x_out = K.dot(x_in, self._kernel) + K.dot(h_last, self._recurrent_kernel) print('K.dot(x_in, self._kernel): ' + str(K.dot(x_in, self._kernel))) print('K.dot(h_last, self._recurrent_kernel): ' + str(K.dot(h_last, self._recurrent_kernel))) x_out = K.bias_add(x_out, self.bias) print('x_out: ' + str(x_out)) f_master_gate = cumsoftmax(x_out[:, :self.levels], 'l2r') print('x_out[:, :self.levels]: ' + str(x_out[:, :self.levels])) print('f_master_gate: ' + str(f_master_gate)) f_master_gate = K.expand_dims(f_master_gate, 2) print('f_master_gate: ' + str(f_master_gate)) i_master_gate = cumsoftmax(x_out[:, self.levels:self.levels * 2], 'r2l') print('x_out[:, self.levels: self.levels * 2]: ' + str(x_out[:, self.levels:self.levels * 2])) print('i_master_gate: ' + str(i_master_gate)) i_master_gate = K.expand_dims(i_master_gate, 2) print('i_master_gate: ' + str(i_master_gate)) x_out = x_out[:, self.levels * 2:] print('x_out: ' + str(x_out)) x_out = K.reshape(x_out, (-1, self.levels * 4, self.chunk_size)) print('x_out: ' + str(x_out)) f_gate = K.sigmoid(x_out[:, :self.levels]) print('x_out[:, :self.levels] ' + str(x_out[:, :self.levels])) print(f_gate) i_gate = K.sigmoid(x_out[:, self.levels:self.levels * 2]) print('x_out[:, self.levels: self.levels * 2] ' + str(x_out[:, self.levels:self.levels * 2])) print(i_gate) o_gate = K.sigmoid(x_out[:, self.levels * 2:self.levels * 3]) print('x_out[:, self.levels * 2: self.levels * 3] ' + str(x_out[:, self.levels * 2:self.levels * 3])) print(o_gate) c_in = K.tanh(x_out[:, self.levels * 3:]) c_last = K.reshape(c_last, (-1, self.levels, self.chunk_size)) overlap = f_master_gate * i_master_gate print('overlap: ' + str(overlap)) c_out = overlap * (f_gate * c_last + i_gate * c_in) + \ (f_master_gate - overlap) * c_last + \ (i_master_gate - overlap) * c_in print('c_out: ' + str(c_out)) h_out = o_gate * K.tanh(c_out) print('h_out: ' + str(h_out)) c_out = K.reshape(c_out, (-1, self.units)) print('c_out: ' + str(c_out)) h_out = K.reshape(h_out, (-1, self.units)) print('h_out: ' + str(h_out)) out = K.concatenate( [h_out, f_master_gate[..., 0], i_master_gate[..., 0]], 1) return out, [c_out, h_out]
def dsilu(x): """ derivative of sigmoid-weighted linear unit activation function as described in Elfwing et. al., Neural Networks 107 (2018) 3-11 :param x: input :return: activations """ return sigmoid(x) * (1 + x * (1 - sigmoid(x)))
def yolo3_decode(feats, anchors, num_classes, input_shape, scale_x_y=None, calc_loss=False): """Decode final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) box_xy = feats[..., :2] box_wh = feats[..., 2:4] box_xy = tf.where(box_xy < -10.0, -10.0, box_xy) box_xy = tf.where(box_xy > 10.0, 10.0, box_xy) box_wh = tf.where(box_wh < -8.0, -8.0, box_wh) box_wh = tf.where(box_wh > 8.0, 8.0, box_wh) # Adjust preditions to each spatial grid point and anchor size. if scale_x_y: # Eliminate grid sensitivity trick involved in YOLOv4 # # Reference Paper & code: # "YOLOv4: Optimal Speed and Accuracy of Object Detection" # https://arxiv.org/abs/2004.10934 # https://github.com/opencv/opencv/issues/17148 # box_xy_tmp = K.sigmoid( feats[..., :2]) * scale_x_y - (scale_x_y - 1) / 2 box_xy = (box_xy_tmp + grid) / K.cast(grid_shape[..., ::-1], K.dtype(feats)) else: box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( grid_shape[..., ::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast( input_shape[..., ::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def yolo_head(feats, anchors, num_classes): """Convert final layer features to bounding box parameters. Parameters ---------- feats : tensor Final convolutional layer features. anchors : array-like Anchor box widths and heights. num_classes : int Number of target classes. Returns ------- box_xy : tensor x, y box predictions adjusted by spatial location in conv layer. box_wh : tensor w, h box predictions adjusted by anchors and conv spatial resolution. box_conf : tensor Probability estimate for whether each box contains any object. box_class_pred : tensor Probability distribution estimate for each box over class labels. """ num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) # Dynamic implementation of conv dims for fully convolutional model. conv_dims = K.shape(feats)[1:3] # assuming channels last # In YOLO the height index is the inner most iteration. conv_height_index = K.arange(0, stop=conv_dims[0]) conv_width_index = K.arange(0, stop=conv_dims[1]) conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) conv_width_index = K.flatten(K.transpose(conv_width_index)) conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) conv_index = K.cast(conv_index, K.dtype(feats)) feats = K.reshape( feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_class_probs = K.softmax(feats[..., 5:]) # Adjust preditions to each spatial grid point and anchor size. # Note: YOLO iterates over height index before width index. box_xy = (box_xy + conv_index) / conv_dims box_wh = box_wh * anchors_tensor / conv_dims return box_confidence, box_xy, box_wh, box_class_probs
def call(self, inputs, **kwargs): W = K.tanh(self.W_hat) * K.sigmoid(self.M_hat) m = K.exp(K.dot(K.log(K.abs(inputs) + self.epsilon), W)) a = K.dot(inputs, W) if self.use_gating: g = K.sigmoid(K.dot(inputs, self.G)) outputs = g * a + (1. - g) * m else: outputs = a + m return outputs
def custom_loss(y_pred_pos, y_pred_neg, model_params): alpha = K.constant(model_params['alpha']) pointwise_loss = -K.log(y_pred_pos + 1e-07) - K.log(1 - y_pred_neg + 1e-07) if model_params['loss'] == 'TOP': pairwise_loss = K.sigmoid(y_pred_neg - y_pred_pos) + K.sigmoid( y_pred_neg * y_pred_neg) else: pairwise_loss = -K.log(K.sigmoid(y_pred_pos - y_pred_neg) + 1e-07) loss = alpha * pairwise_loss + (1 - alpha) * pointwise_loss return tf.reduce_mean(loss)
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): num_anchors = len(anchors) #---------------------------------------------------# # [1, 1, 1, num_anchors, 2] #---------------------------------------------------# feats = tf.convert_to_tensor(feats) anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) #---------------------------------------------------# # 获得x,y的网格 # (13, 13, 1, 2) #---------------------------------------------------# grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) #---------------------------------------------------# # 将预测结果调整成(batch_size,13,13,3,85) # 85可拆分成4 + 1 + 80 # 4代表的是中心宽高的调整参数 # 1代表的是框的置信度 # 80代表的是种类的置信度 #---------------------------------------------------# feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) #---------------------------------------------------# # 将预测值调成真实值 # box_xy对应框的中心点 # box_wh对应框的宽和高 #---------------------------------------------------# box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( grid_shape[..., ::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast( input_shape[..., ::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) #---------------------------------------------------------------------# # 在计算loss的时候返回grid, feats, box_xy, box_wh # 在预测的时候返回box_xy, box_wh, box_confidence, box_class_probs #---------------------------------------------------------------------# if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def call(self, inputs, **kwargs): W = backend.tanh(self.W_hat) * backend.sigmoid(self.M_hat) a = backend.dot(inputs, W) m = backend.exp( backend.dot(backend.log(backend.abs(inputs) + self.e), W)) if self.cell == 'a': y = a elif self.cell == 'm': y = m else: g = backend.sigmoid(backend.dot(inputs, self.G)) y = (g * a) + ((1 - g) * m) return y
def gumbel_sigmoid(x, tau, from_logits=False, straight_through=False): # ref: https://arxiv.org/abs/1611.01144 # ref: https://arxiv.org/abs/1611.00712 eps = 1e-20 u = K.random_uniform(K.shape(x), eps, 1 - eps) if not from_logits: x = K.log(K.maximum(eps, x)) - K.log(K.maximum(eps, 1 - x)) # prob->logit y = x + K.log(u) - K.log(1 - u) if tau > 0: if straight_through: return combine_value_gradient(step(y), K.sigmoid(y / tau)) else: return K.sigmoid(y / tau) else: return step(y)
def call(self, x): """ @param `x`: Dim(height, width, height, channels) @return: Dim(height, width, height, channels) """ sig = K.sigmoid(x) sig_s = tf.split(sig, 3, axis=-1) raw_s = tf.split(x, 3, axis=-1) output = [] for n, mask in enumerate(self.metalayer.mask): # # x, y, w, h, o, c0, c1, ... # Operation not supported on Edge TPU xy, _, oc = tf.split(sig_s[n], [2, 2, -1], axis=-1) _, wh, _ = tf.split(raw_s[n], [2, 2, -1], axis=-1) # Can be Mapped to Edge TPU # x, y if self.metalayer.scale_x_y != 1.0: xy = (xy - 0.5) * self.metalayer.scale_x_y + 0.5 xy += self.cx_cy xy /= (self.metalayer.width, self.metalayer.height) # w, h anchor = self.metalayer.anchors[mask] anchor = ( anchor[0] / self.metanet.width, anchor[1] / self.metanet.height, ) wh = K.exp(wh) * anchor output.append(K.concatenate([xy, wh, oc], axis=-1)) return K.concatenate(output, axis=-1)