def yolo_head(feats, anchors, num_classes, input_shape): """Convert final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (box_xy + grid) / K.cast(grid_shape[::-1], K.dtype(feats)) box_wh = box_wh * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats)) return box_xy, box_wh, box_confidence, box_class_probs
def get_split_averages(input_tensor, input_mask, indices): # Splits input tensor into three parts based on the indices and # returns average of values prior to index, values at the index and # average of values after the index. # input_tensor: (batch_size, input_length, input_dim) # input_mask: (batch_size, input_length) # indices: (batch_size, 1) # (1, input_length) length_range = K.expand_dims(K.arange(K.shape(input_tensor)[1]), dim=0) # (batch_size, input_length) batched_range = K.repeat_elements(length_range, K.shape(input_tensor)[0], 0) tiled_indices = K.repeat_elements(indices, K.shape(input_tensor)[1], 1) # (batch_size, input_length) greater_mask = K.greater(batched_range, tiled_indices) # (batch_size, input_length) lesser_mask = K.lesser(batched_range, tiled_indices) # (batch_size, input_length) equal_mask = K.equal(batched_range, tiled_indices) # (batch_size, input_length) # We also need to mask these masks using the input mask. # (batch_size, input_length) if input_mask is not None: greater_mask = switch(input_mask, greater_mask, K.zeros_like(greater_mask)) lesser_mask = switch(input_mask, lesser_mask, K.zeros_like(lesser_mask)) post_sum = K.sum(switch(K.expand_dims(greater_mask), input_tensor, K.zeros_like(input_tensor)), axis=1) # (batch_size, input_dim) pre_sum = K.sum(switch(K.expand_dims(lesser_mask), input_tensor, K.zeros_like(input_tensor)), axis=1) # (batch_size, input_dim) values_at_indices = K.sum(switch(K.expand_dims(equal_mask), input_tensor, K.zeros_like(input_tensor)), axis=1) # (batch_size, input_dim) post_normalizer = K.expand_dims(K.sum(greater_mask, axis=1) + K.epsilon(), dim=1) # (batch_size, 1) pre_normalizer = K.expand_dims(K.sum(lesser_mask, axis=1) + K.epsilon(), dim=1) # (batch_size, 1) return K.cast(pre_sum / pre_normalizer, 'float32'), values_at_indices, K.cast(post_sum / post_normalizer, 'float32')
def call(self, x): r = K.cast(K.arange(self.num), K.floatx()) / float(self.num - 1) r = self.start + (self.stop - self.start) * r r = K.expand_dims(K.expand_dims(r), axis=0) r = K.cast(r, dtype=K.floatx()) r = K.tile(r, (K.shape(x)[0], 1, 1)) return r
def accumulate(attend_function, inputs, input_length, mask=None, return_probabilities=False): '''get the running attention over a sequence. given a 3dim tensor where the 1st dim is time (or not. whatever.), calculating the running attended sum. in other words, at the first time step, you only have that item. at the second time step, attend over the first two items. at the third.. the third. so on. this basically a mod on keras' rnn implementation author: bcm ''' ndim = inputs.ndim assert ndim >= 3, 'inputs should be at least 3d' axes = [1,0] + list(range(2, ndim)) inputs = inputs.dimshuffle(axes) indices = list(range(input_length)) successive_outputs = [] if mask is not None: if mask.ndim == ndim-1: mask = K.expand_dims(mask) assert mask.ndim == ndim mask = mask.dimshuffle(axes) prev_output = None successive_outputs = [] successive_pvecs = [] uncover_mask = K.zeros_like(inputs) uncover_indices = K.arange(input_length) for _ in range(ndim-1): uncover_indices = K.expand_dims(uncover_indices) make_subset = lambda i,X: K.switch(uncover_indices <= i, X, uncover_mask) for i in indices: inputs_i = make_subset(i,inputs) mask_i = make_subset(i,mask) if mask is not None: output = attend_function(inputs_i, mask_i) # this should not output the time dimension; it should be marginalized over. else: output = attend_function(inputs_i) # this should not output the time dimension; it should be marginalized over. if return_probabilities: output, p_vectors = output successive_pvecs.append(p_vectors) assert output.ndim == 2, "Your attention function is malfunctioning; the attention accumulator should return 2 dimensional tensors" successive_outputs.append(output) outputs = K.pack(successive_outputs) K.squeeze(outputs, -1) axes = [1, 0] + list(range(2, outputs.ndim)) outputs = outputs.dimshuffle(axes) if return_probabilities: out_pvecs = K.pack(successive_pvecs) K.squeeze(out_pvecs, -1) out_pvecs = out_pvecs.dimshuffle(axes) outputs = [outputs, out_pvecs] return outputs
def call(self, x, mask=None): # x[0]: (batch_size, input_length, input_dim) # x[1]: (batch_size, 1) indices of prepositions # Optional: x[2]: (batch_size, input_length - 2) assert isinstance(x, list) or isinstance(x, tuple) encoded_sentence = x[0] prep_indices = K.squeeze(x[1], axis=-1) #(batch_size,) batch_indices = K.arange(K.shape(encoded_sentence)[0]) # (batch_size,) if self.with_attachment_probs: # We're essentially doing K.argmax(x[2]) here, but argmax is not differentiable! head_probs = x[2] head_probs_padding = K.zeros_like(x[2])[:, :2] # (batch_size, 2) # (batch_size, input_length) padded_head_probs = K.concatenate([head_probs, head_probs_padding]) # (batch_size, 1) max_head_probs = K.expand_dims(K.max(padded_head_probs, axis=1)) # (batch_size, input_length, 1) max_head_prob_indices = K.expand_dims(K.equal(padded_head_probs, max_head_probs)) # (batch_size, input_length, input_dim) masked_head_encoding = K.switch(max_head_prob_indices, encoded_sentence, K.zeros_like(encoded_sentence)) # (batch_size, input_dim) head_encoding = K.sum(masked_head_encoding, axis=1) else: head_indices = prep_indices - 1 # (batch_size,) head_encoding = encoded_sentence[batch_indices, head_indices, :] # (batch_size, input_dim) prep_encoding = encoded_sentence[batch_indices, prep_indices, :] # (batch_size, input_dim) child_encoding = encoded_sentence[batch_indices, prep_indices+1, :] # (batch_size, input_dim) ''' prep_indices = x[1] sentence_mask = mask[0] if sentence_mask is not None: if K.ndim(sentence_mask) > 2: # This means this layer came after a Bidirectional layer. Keras has this bug which # concatenates input masks instead of output masks. # TODO: Fix Bidirectional instead. sentence_mask = K.any(sentence_mask, axis=(-2, -1)) head_encoding, prep_encoding, child_encoding = self.get_split_averages(encoded_sentence, sentence_mask, prep_indices) ''' head_projection = K.dot(head_encoding, self.proj_head) # (batch_size, proj_dim) prep_projection = K.dot(prep_encoding, self.proj_prep) # (batch_size, proj_dim) child_projection = K.dot(child_encoding, self.proj_child) # (batch_size, proj_dim) #(batch_size, proj_dim) if self.composition_type == 'HPCT': composed_projection = K.tanh(head_projection + prep_projection + child_projection) elif self.composition_type == 'HPC': prep_child_projection = K.tanh(prep_projection + child_projection) # (batch_size, proj_dim) composed_projection = K.tanh(head_projection + prep_child_projection) else: # Composition type in HC composed_projection = K.tanh(head_projection + child_projection) for hidden_layer in self.hidden_layers: composed_projection = K.tanh(K.dot(composed_projection, hidden_layer)) # (batch_size, proj_dim) # (batch_size, num_classes) class_scores = K.dot(composed_projection, self.scorer) label_probabilities = K.softmax(class_scores) return label_probabilities
def call(self, x, mask=None): if isinstance(x, list): x,_ = x if mask is not None and isinstance(mask, list): mask,_ = mask if 0. < self.dropout < 1.: retain_p = 1. - self.dropout dims = self.W._keras_shape[:-1] B = K.random_binomial(dims, p=retain_p) * (1. / retain_p) B = K.expand_dims(B) W = K.in_train_phase(self.W * B, self.W) else: W = self.W if self.mode == 'matrix': return K.gather(W,x) elif self.mode == 'tensor': # quick and dirty: only allowing for 3dim inputs when it's tensor mode assert K.ndim(x) == 3 # put sequence on first; gather; take diagonal across shared batch dimension # in other words, W is (B, S, F) # incoming x is (B, S, A) inds = K.arange(self.W._keras_shape[0]) #out = K.gather(K.permute_dimensions(W, (1,0,2)), x).diagonal(axis1=0, axis2=3) #return K.permute_dimensions(out, (3,0,1,2)) ### method above doesn't do grads =.= # tensor abc goes to bac, indexed onto with xyz, goes to xyzac, # x == a, so shape to xayzc == xxyzc # take diagonal on first two: xyzc #out = K.colgather() out = K.gather(K.permute_dimensions(W, (1,0,2)), x) out = K.permute_dimensions(out, (0,3,1,2,4)) out = K.gather(out, (inds, inds)) return out else: raise Exception('sanity check. should not be here.') #all_dims = T.arange(len(self.W._keras_shape)) #first_shuffle = [all_dims[self.embed_dim]] + all_dims[:self.embed_dim] + all_dims[self.embed_dim+1:] ## 1. take diagonal from 0th to ## chang eof tactics ## embed on time or embed on batch. that's all I'm supporting. ## if it's embed on time, then, x.ndim+1 is where batch will be, and is what ## i need to take the diagonal over. ## now dim shuffle the xdims + 1 to the front. #todo: get second shuffle or maybe find diagonal calculations #out = K.gather(W, x) #return out ### reference #A = S(np.arange(60).reshape(3,4,5)) #x = S(np.random.randint(0, 4, (3,4,10))) #x_emb = A.dimshuffle(1,0,2)[x].dimshuffle(0,3,1,2,4)[T.arange(A.shape[0]), T.arange(A.shape[0])]
def idx2pos(self, pid): pid = K.cast(pid, 'float32') pid = K.expand_dims(pid, 2) pj = 1. / K.pow( 10000., 2. / self.v_dim * K.arange(self.v_dim // 2, dtype='float32')) pj = K.expand_dims(pj, 0) pv = K.dot(pid, pj) pv1, pv2 = K.sin(pv), K.cos(pv) pv1, pv2 = K.expand_dims(pv1, 3), K.expand_dims(pv2, 3) pv = K.concatenate([pv1, pv2], 3) return K.reshape(pv, (K.shape(pv)[0], K.shape(pv)[1], self.v_dim))
def __init__(self, landmarks, **kwargs): ''' landmarks: fixed landmarks using ''' super(GaussianKernel2, self).__init__(**kwargs) self.landmarks = landmarks.astype(np.float32) self.num_landmark, self.num_feature = landmarks.shape self.output_dim = self.num_landmark # for loop self.indx = K.arange(self.output_dim)
def call(self, inputs, mask=None, **kwargs): input_len = K.shape(inputs)[1] if self.attention_type == SeqSelfAttention.ATTENTION_TYPE_ADD: e = self._call_additive_emission(inputs) elif self.attention_type == SeqSelfAttention.ATTENTION_TYPE_MUL: e = self._call_multiplicative_emission(inputs) if self.attention_activation is not None: e = self.attention_activation(e) e = K.exp(e - K.max(e, axis=-1, keepdims=True)) if self.attention_width is not None: if self.history_only: lower = K.arange(input_len) - (self.attention_width - 1) else: lower = K.arange(input_len) - self.attention_width // 2 lower = K.expand_dims(lower, axis=-1) upper = lower + self.attention_width indices = K.tile(K.expand_dims(K.arange(input_len), axis=0), [input_len, 1]) e = e * K.cast(lower <= indices, K.floatx()) * K.cast( indices < upper, K.floatx()) if mask is not None: mask = K.cast(mask, K.floatx()) mask = K.expand_dims(mask) e = K.permute_dimensions( K.permute_dimensions(e * mask, (0, 2, 1)) * mask, (0, 2, 1)) # a_{t} = \text{softmax}(e_t) s = K.sum(e, axis=-1, keepdims=True) a = e / (s + K.epsilon()) # l_t = \sum_{t'} a_{t, t'} x_{t'} v = K.batch_dot(a, inputs) if self.attention_regularizer_weight > 0.0: self.add_loss(self._attention_regularizer(a)) if self.return_attention: return [v, a] return v
def _call_cat(self, inputs, mask=None): outputs = self._call_normal(inputs) xx, xy = tf.meshgrid(K.arange(inputs.shape[1]), K.arange(inputs.shape[2])) xx = K.expand_dims(K.flatten(xx), 1) xy = K.expand_dims(K.flatten(xy), 1) xc = K.concatenate([xx, xy], axis=1) if mask is not None: xc = xc[mask, :] n_samples = min(self.n_samples, xc.shape[0]) xx = xc[:n_samples, 0] yy = xc[:n_samples, 1] ## Need to understand of PyTorch tensor shaping ## ls = [] for i, out in enumerate(outputs): b, w, h, c = out.shape x = out if i > 0 and out.shape[1] < outputs[i - 1].shape[1]: xx = K.cast(xx, 'float32') / 2. yy = K.cast(yy, 'float32') / 2. xx = K.cast(K.clip(xx, 0, out.shape[1] - 1), 'int32') yy = K.cast(K.clip(yy, 0, out.shape[2] - 1), 'int32') idx = xx * h + yy x = tf.gather(K.reshape(x, (-1, w * h, c)), idx, axis=1) x = K.expand_dims(x, axis=2) ls.append(x) # NOTICE: the original code do clone() and detach() out = K.concatenate( ls, axis=-1) # NOTICE: the original code do contiguous() return out
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): num_anchors = len(anchors) #锚框个数 # 将锚框数据转化为tensor,其维度为[1,1,1,num_anchors,2],即[batch, height, width, num_anchors, box_params] anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) # 网格数据,计算损失时使用 grid_shape = K.shape(feats)[1:3] grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) # 对feats做reshape处理,[-1,height,width,num_anchors,num_classes+5] feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # box的属性:point(xy)、宽高(wh)、置信度、所属类 # 使用reshape后feats最后一维的相关数据(num_classes+5),来计算box的属性 # 第0、1数据对应point(xy) # 第2、3数据对应宽高(wh) # 第4数据对应置信度confidence # 第5及之后的数据,对应各class的归属概率值 box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( grid_shape[::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast( input_shape[::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) # 如果计算损失,传回grid, feats, box_xy, box_wh if calc_loss == True: return grid, feats, box_xy, box_wh # 如果用作预测,传回box_xy, box_wh, box_confidence, box_class_probs return box_xy, box_wh, box_confidence, box_class_probs
def call(self, x, mask=None): # e_{t, t'} = x_t^T W_a x_{t'} + b_a e = K.batch_dot(K.dot(x, self.Wa), K.permute_dimensions(inputs, (0, 2, 1))) lower = K.arange(0, input_len) - (self.attention_width - 1) lower = K.expand_dims(lower, axis=-1) upper = lower + self.attention_width indices = K.expand_dims(K.arange(0, input_len), axis=0) e = e * K.cast(lower <= indices, K.floatx()) * K.cast(indices < upper, K.floatx()) # a_{t} = \text{softmax}(e_t) s = K.sum(e, axis=-1, keepdims=True) a = e / (s + K.epsilon()) # l_t = \sum_{t'} a_{t, t'} x_{t'} v = K.batch_dot(a, x) return v attention_flat = K.exp( K.squeeze(K.dot(x,self.context), axis=-1) ) attention2 = attention_flat /K.expand_dims(K.sum(attention_flat, axis=-1), -1) print('--- shapes: ', K.shape(x), K.shape(self.context)) print('... shape k.dot', K.shape(K.dot(x, self.context))) print('... shape k.squezze', K.shape(K.squeeze(K.dot(x, self.context), axis=-1))) print('... shape k.exp', K.shape(K.exp(K.squeeze(K.dot(x, self.context), axis=-1)))) if mask is not None: attention = attention * K.cast(mask, 'float32') # weighted_sum = K.batch_dot(K.permute_dimensions(x, [0, 2, 1]), attention) # multiplicative weighted_sum = K.batch_dot(K.permute_dimensions(x, [0, 2, 1]), attention2) # return weighted_sum
def kappa_keras(y_true, y_pred): y_true = K.cast(K.argmax(y_true, axis=-1), dtype='int32') y_pred = K.cast(K.argmax(y_pred, axis=-1), dtype='int32') # Figure out normalized expected values min_rating = K.minimum(K.min(y_true), K.min(y_pred)) max_rating = K.maximum(K.max(y_true), K.max(y_pred)) # shift the values so that the lowest value is 0 # (to support scales that include negative values) y_true = K.map_fn(lambda y: y - min_rating, y_true, dtype='int32') y_pred = K.map_fn(lambda y: y - min_rating, y_pred, dtype='int32') # Build the observed/confusion matrix num_ratings = max_rating - min_rating + 1 observed = tf.math.confusion_matrix(y_true, y_pred, num_classes=num_ratings) num_scored_items = K.shape(y_true)[0] weights = K.expand_dims(K.arange(num_ratings), axis=-1) - K.expand_dims( K.arange(num_ratings), axis=0) weights = K.cast(K.pow(weights, 2), dtype='float64') hist_true = tf.math.bincount(y_true, minlength=num_ratings) hist_true = hist_true[:num_ratings] / num_scored_items hist_pred = tf.math.bincount(y_pred, minlength=num_ratings) hist_pred = hist_pred[:num_ratings] / num_scored_items expected = K.dot(K.expand_dims(hist_true, axis=-1), K.expand_dims(hist_pred, axis=0)) # Normalize observed array observed = observed / num_scored_items # If all weights are zero, that means no disagreements matter. score = tf.where(K.any(K.not_equal(weights, 0)), K.sum(weights * observed) / K.sum(weights * expected), 0) return 1. - score
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): """Convert final layer features to bounding box parameters.""" # feats是一个4d tensor[bs, width, height, channel] # GAP->channel是每个anchor预测的bbox数3*(4+1+class)=21 num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. # 每一个anchor都是一个维数为2的向量,所以最后一维的维数是2 anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) # 就是取第二第三个的维度,结果是两个数字,所以grid_shape是一维[a,b] grid_shape = K.shape(feats)[1:3] # 输出特征图的height, width,也就是[10,10] # K.tile(x, n) 将x在各个维度上重复n次,x为张量,n为与x维度数目相同的列表 # reshape时-1所在的位置,通道数不定 grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) # 这里得到的是一个坐标的集合,包含特征图上所有点的坐标,也就是最终需要微调的点的坐标 grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # Adjust preditions to each spatial grid point and anchor size. # 利用sigmoid得到输出的中心坐标微调值,和原坐标相加,再除以总长度,得到在原图中的相对比例位置 box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( grid_shape[::-1], K.dtype(feats)) # 长和宽用exp来做 box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast( input_shape[::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) # 训练时有label,并不需要objectness和各类别的可能性 # 而推理时,需要输出当前的置信度 if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): """Convert final layer features to bounding box parameters.""" # print("feats is", feats) # feats' shape is (?, ?, 255) # 这些工作都在输入数据前做完了,所以是先搭一个数据流,搭完以后,数据走完全程出结果?probably. # probably because yolo make predict in different scale, feats shape are different. # The above is not right. num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( grid_shape[::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast( input_shape[::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) # grid包含grid的位置 # Tensor("Cast_4:0", shape=(?, ?, 1, 2), dtype=float32) Tensor("Reshape_9:0", shape=(?, ?, ?, 3, 85), dtype=float32) Tensor("truediv_8:0", shape=(?, ?, ?, 3, 2), dtype=float32) Tensor("truediv_9:0", shape=(?, ?, ?, 3, 2), dtype=float32) # Tensor("truediv_8:0", shape=(?, ?, ?, 3, 2), dtype=float32) Tensor("truediv_9:0", shape=(?, ?, ?, 3, 2), dtype=float32) Tensor("Sigmoid_4:0", shape=(?, ?, ?, 3, 1), dtype=float32) Tensor("Sigmoid_5:0", shape=(?, ?, ?, 3, 80), dtype=float32) if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): ### 配合高斯版本2 """Convert final layer features to bounding box parameters.""" # (yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) ### grid = K.concatenate([grid_x, grid_y]) # grid = K.concatenate([grid_x, grid_y], axis=-1) grid = K.concatenate([grid_x, grid_y], axis=-1) ### grid = K.cast(grid, K.dtype(feats)) # [?,?,?,42] feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 9]) grid = K.cast(grid, K.dtype(feats)) ### 从上边换到下边 # Adjust preditions to each spatial grid point and anchor size. box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( grid_shape[::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast( input_shape[::-1], K.dtype(feats)) box_delta_xy = K.sigmoid(feats[..., :2]) box_log_wh = feats[..., 2:4] box_sigma = K.sigmoid(feats[..., 4:8]) box_confidence = K.sigmoid(feats[..., 8:9]) box_class_probs = K.sigmoid(feats[..., 9:]) if calc_loss == True: return grid, box_delta_xy, box_log_wh, box_sigma, box_xy, box_wh, box_confidence, box_class_probs # grid, raw_pred_delta_xy, raw_pred_log_wh, raw_pred_sigma, pred_xy, pred_wh, pred_confidence, pred_class return box_xy, box_wh, box_confidence, box_class_probs
def call(self, x, **kwargs): if not self.d_pos_enc or self.mode == 'sum': self.d_pos_enc = int(x.shape[-1]) position_j = 1. / K.pow(10000., 2 * K.arange(self.d_pos_enc/2, dtype='float32') / self.d_pos_enc) position_j = K.expand_dims(position_j, 0) position_i = K.cumsum(K.ones_like(x[:, :, 0]), 1)-1 # K.arange不支持变长,只好用这种方法生成 position_i = K.expand_dims(position_i, 2) position_ij = K.dot(position_i, position_j) position_ij = K.concatenate([K.cos(position_ij), K.sin(position_ij)], 2) if self.mode == 'sum': return position_ij + x elif self.mode == 'concat': return K.concatenate([position_ij, x], -1)
def RBF_Soft_Loss(y_true, y_pred): lam = RBF_LAMBDA indices = softargmax(y_true) indices = tf.dtypes.cast(indices, tf.int32) y_pred = tf.dtypes.cast(y_pred, tf.float32) y_true = tf.dtypes.cast(y_true, tf.float32) row_ind = K.arange(K.shape(y_true)[0]) full_indices = tf.stack([row_ind, indices], axis=1) d = tf.gather_nd(y_pred, full_indices) y_pred = K.log(1 + K.exp(lam - y_pred)) S = K.sum(y_pred, axis=1) - K.log(1 + K.exp(lam - d)) y = K.sum(d + S) return y
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): ''' feats[?,?,?,255], anchors[3,2][[],[],[]], num_classes, input_shape return 根据featuremap解码得到预测框 + ?+ ? ''' num_anchors = len(anchors) # [1, 1, 1, num_anchors, 2] anchors设置成跟特征层的大小一样,方便后面计算 anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) # 获得x,y的网格 # (13, 13, 1, 2) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) # (batch_size,13,13,3,85) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # 将预测值调成真实值 # box_xy对应框的中心点 # box_wh对应框的宽和高 # (cx+grid)/w; (cy+grid)/h) box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( grid_shape[::-1], K.dtype(feats)) # exp{fea_h * anchor_h}/416 ; exp{fea_w * anchor_w}/416 box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast( input_shape[::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) # 在计算loss的时候返回如下参数 if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def _create_offset_map(self, output_shape): """ In Yolo9000 paper, Grid map to calculate offsets for each cell in the output feature map """ GRID_H = tf.cast(output_shape[1], tf.int32) # shape of output feature map GRID_W = tf.cast(output_shape[2], tf.int32) cx = tf.cast((K.arange(0, stop=GRID_W)), dtype=tf.float32) cx = K.tile(cx, [GRID_H]) cx = K.reshape(cx, [-1, GRID_H, GRID_W, 1]) cy = K.cast((K.arange(0, stop=GRID_H)), dtype=tf.float32) cy = K.reshape(cy, [-1, 1]) cy = K.tile(cy, [1, GRID_W]) cy = K.reshape(cy, [-1]) cy = K.reshape(cy, [-1, GRID_H, GRID_W, 1]) c_xy = tf.stack([cx, cy], -1) c_xy = K.cast(c_xy, tf.float32) return c_xy
def set_subtensor(tensor, value, *indices): x_shape = K.shape(indices[-1])[0] z_shape = K.int_shape(tensor)[-1] indices = K.tf.transpose( K.tf.stack([repeat_elements(idx, z_shape) for idx in indices])) z = K.expand_dims( K.flatten(K.tf.tile(K.arange(0, z_shape), [x_shape])), -1) indices = K.concatenate([indices, z], axis=-1) binary_mask = K.tf.cast( K.tf.sparse_to_dense(indices, K.shape(tensor), 1), K.tf.bool) val_tensor = K.tf.sparse_to_dense(indices, K.shape(tensor), K.flatten(value)) return K.tf.where(binary_mask, val_tensor, tensor)
def yolo_head(feats, anchors, num_classes): ''' :param feats: the output of the model. one tensor of shape (n_sample, n_row_cell, n_column_cell, n_anchors*(5 + classes)) :param anchors: anchor boxes :param num_classes: the number of classes :return: yolo_outputs, containging 4 tensors, the outputs being used for filtering ''' num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) # Dynamic implementation of conv dims for fully convolutional model conv_dims = K.shape(feats)[1:3] # In YOLO the height index is the inner most iteration conv_height_index = K.arange(0, stop=conv_dims[0]) conv_width_index = K.arange(0, stop=conv_dims[1]) conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) conv_width_index = K.flatten(K.transpose(conv_width_index)) conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) conv_index = K.cast(conv_index, K.dtype(feats)) feats = K.reshape( feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_class_probs = K.softmax(feats[..., 5:]) # Adjust preditions to each spatial grid point and anchor size. # Note: YOLO iterates over height index before width index. box_xy = (box_xy + conv_index) / conv_dims box_wh = box_wh * anchors_tensor / conv_dims return box_confidence, box_xy, box_wh, box_class_probs
def _compute_valid_seed_region(self, height, width, depth): positions = K.concatenate([ K.expand_dims(K.tile( K.expand_dims(K.expand_dims(K.arange(height), axis=0), axis=0), [depth, width, 1]), axis=-1), K.expand_dims(K.tile( K.expand_dims(K.expand_dims(K.arange(width), axis=1), axis=0), [depth, 1, height]), axis=-1), K.expand_dims(K.tile( K.expand_dims(K.expand_dims(K.arange(depth), axis=1), axis=1), [1, width, height]), axis=-1) ], axis=-1) half_block_size = self.block_size // 2 valid_seed_region = K.switch( K.all( K.stack( [ positions[:, :, :, 0] >= half_block_size, positions[:, :, :, 1] >= half_block_size, positions[:, :, :, 2] >= half_block_size, positions[:, :, :, 0] < height - half_block_size, positions[:, :, :, 1] < width - half_block_size, positions[:, :, :, 2] < depth - half_block_size, ], axis=-1, ), axis=-1, ), K.ones((height, width, depth)), K.zeros((height, width, depth)), ) return K.expand_dims(K.expand_dims(valid_seed_region, axis=0), axis=-1)
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): #把预测的网格画出来 num_anchors = len(anchors) # [1, 1, 1, num_anchors, 2] anchors_tensor = K.reshape( K.constant(anchors), [1, 1, 1, num_anchors, 2]) #先验框reshape,以便与特征层的计算 # 获得x,y的网格 建立13*13网格 # (13, 13, 1, 2) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) # (batch_size,13,13,3,85) #特征层的reshape feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # 将预测值调成真实值 # box_xy对应框的中心点 # box_wh对应框的宽和高 #网格的解码 box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( grid_shape[::-1], K.dtype(feats)) #前两个参数+grid/13,归一化 box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast( input_shape[::-1], K.dtype(feats)) #长宽取指数*先验框/(416*416) box_confidence = K.sigmoid(feats[..., 4:5]) #置信度归一化 box_class_probs = K.sigmoid(feats[..., 5:]) #种类归一化 # 在计算loss的时候返回如下参数 if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def interpret_prediction(prediction, anchors, num_classes): N_CLASSES = num_classes N_ANCHORS = len(anchors) ANCHORS = anchors pred_shape = tf.shape(prediction) GRID_H, GRID_W = pred_shape[1], pred_shape[2] prediction = K.reshape( prediction, [-1, pred_shape[1], pred_shape[2], N_ANCHORS, N_CLASSES + 5]) # Create off set map cx = tf.cast((K.arange(0, stop=GRID_W)), dtype=tf.float32) cx = K.tile(cx, [GRID_H]) cx = K.reshape(cx, [-1, GRID_H, GRID_W, 1]) cy = K.cast((K.arange(0, stop=GRID_H)), dtype=tf.float32) cy = K.reshape(cy, [-1, 1]) cy = K.tile(cy, [1, GRID_W]) cy = K.reshape(cy, [-1]) cy = K.reshape(cy, [-1, GRID_H, GRID_W, 1]) c_xy = tf.stack([cx, cy], -1) c_xy = tf.to_float(c_xy) anchors_tensor = tf.to_float(K.reshape(ANCHORS, [1, 1, 1, N_ANCHORS, 2])) netout_size = tf.to_float(K.reshape([GRID_W, GRID_H], [1, 1, 1, 1, 2])) box_xy = K.sigmoid(prediction[..., :2]) box_wh = K.exp(prediction[..., 2:4]) box_confidence = K.sigmoid(prediction[..., 4:5]) box_class_probs = K.softmax(prediction[..., 5:]) # Shift center points to its grid cell accordingly (Ref: YOLO-9000 loss function) box_xy = (box_xy + c_xy) / netout_size box_wh = (box_wh * anchors_tensor) / netout_size return box_xy, box_wh, box_confidence, box_class_probs
def _pave_embedding(self, _embedding): dtype = _embedding.dtype start, seg_num = int(self.mask_zero), self.seg_num + self.mask_zero ele_num = self.input_dim * self._target_dim * seg_num indices = bk.arange(0, ele_num) % seg_num _embedding1 = bk.concatenate([_embedding[1:], _embedding[-1:]]) _embedding_1 = bk.concatenate([_embedding[0:1], _embedding[:-1]]) return (bk.cast(indices < start, dtype) * _embedding + bk.cast(indices == start, dtype) * _embedding1 + bk.cast(indices == seg_num - 1, dtype) * _embedding_1 + bk.cast((indices > start) & (indices < seg_num - 1), dtype) * (_embedding1 + _embedding_1) / 2)
def init_left(self): """ Used to generate a leftmask :return: """ K.set_floatx('float32') k_weights_tem_2d_left = K.arange(self.kernel.shape[0]) k_weights_tem_2d_left = tf.expand_dims(k_weights_tem_2d_left, 1) k_weights_tem_3d_left = K.cast( K.repeat_elements( k_weights_tem_2d_left, self.kernel.shape[2], axis=1), dtype='float32') - self.k_weights[0, :, :] self.k_weights_3d_left = tf.expand_dims(k_weights_tem_3d_left, 1)
def predict_log_proba(self, X, y): """ Predicts the log probability of y given X """ X = X.reshape((-1, self.input_dim)) parameters = self.model.predict(X) y = y.reshape((-1,self.output_dim)) # check for nan if np.any(np.isnan(parameters)): return np.full((y.shape[0]), -np.inf) parameters = K.variable(parameters) x = K.variable(y) func = lambda i : self.log_Gaussian_likelihood(x[i:i+1,:], parameters) return K.eval(K.map_fn(func, K.arange(0, y.shape[0]), dtype='float32')).reshape((y.shape[0]))
def seq_gather(x): """ seq是[None, seq_len, s_size]的格式, idxs是[None, 1]的格式,在seq的第i个序列中选出第idxs[i]个向量, 最终输出[None, s_size]的向量。 """ seq, idxs = x idxs = K.cast(idxs, 'int32') batch_idxs = K.arange(0, K.shape(seq)[0]) batch_idxs = K.expand_dims(batch_idxs, 1) idxs = K.concatenate([batch_idxs, idxs], 1) ret = K.tf.gather_nd(seq, idxs) return ret
def call(self, inputs, mask=None): input_shape = K.shape(inputs) if self.mode == self.MODE_ADD: batch_size, seq_len, output_dim = input_shape[0], input_shape[1], input_shape[2] pos_input = K.tile(K.expand_dims(K.arange(seq_len), axis=0), [batch_size, 1]) elif self.mode == self.MODE_CONCAT: batch_size, seq_len, output_dim = input_shape[0], input_shape[1], self.output_dim pos_input = K.tile(K.expand_dims(K.arange(seq_len), axis=0), [batch_size, 1]) else: output_dim = self.output_dim pos_input = inputs if K.dtype(pos_input) != K.floatx(): pos_input = K.cast(pos_input, K.floatx()) evens = K.arange(output_dim // 2) * 2 odds = K.arange(output_dim // 2) * 2 + 1 even_embd = K.sin( K.dot( K.expand_dims(pos_input, -1), K.expand_dims(1.0 / K.pow( 10000.0, K.cast(evens, K.floatx()) / K.cast(output_dim, K.floatx()) ), 0) ) ) odd_embd = K.cos( K.dot( K.expand_dims(pos_input, -1), K.expand_dims(1.0 / K.pow( 10000.0, K.cast((odds - 1), K.floatx()) / K.cast(output_dim, K.floatx()) ), 0) ) ) embd = K.stack([even_embd, odd_embd], axis=-1) output = K.reshape(embd, [-1, K.shape(inputs)[1], output_dim]) if self.mode == self.MODE_CONCAT: output = K.concatenate([inputs, output], axis=-1) if self.mode == self.MODE_ADD: output += inputs return output
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): """Convert final layer features to bounding box parameters.""" num_anchors = anchors_per_level # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile( tf.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1], name='yolo_head/tile/reshape/grid_y'), [1, grid_shape[1], 1, 1]) grid_x = K.tile( tf.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1], name='yolo_head/tile/reshape/grid_x'), [grid_shape[0], 1, 1, 1]) grid = tf.concat([grid_x, grid_y], axis=-1, name='yolo_head/concatenate/grid') grid = K.cast(grid, K.dtype(feats)) global _var _var = [grid_shape, feats, anchors_tensor] feats = tf.reshape(feats, [ -1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5 + NUM_ANGLES3 ], name='yolo_head/reshape/feats') # Adjust predictions to each spatial grid point and anchor size. box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( grid_shape[..., ::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast( input_shape[..., ::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:5 + num_classes]) if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def yolo_head(feats, anchors, num_classes): num_anchors = len(anchors) anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) conv_dims = K.shape(feats)[1:3] # assuming channels last. conv_height_index = K.arange(0, stop=conv_dims[0]) conv_width_index = K.arange(0, stop=conv_dims[1]) conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) conv_width_index = K.flatten(K.transpose(conv_width_index)) conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) conv_index = K.cast(conv_index, K.dtype(feats)) feats = K.reshape(feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_class_probs = K.softmax(feats[..., 5:]) box_xy = (box_xy + conv_index) / conv_dims box_wh = box_wh * anchors_tensor / conv_dims return box_confidence, box_xy, box_wh, box_class_probs
def _head( feats: Tensor, anchors: np.ndarray, num_classes: int, input_shape: Tensor ) -> Tuple[Tensor, Tensor, Tensor, Tensor]: """ :param feats: :param anchors: :param num_classes: :param input_shape: :return: """ num_anchors = len(anchors) anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile( K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1], ) grid_x = K.tile( K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1], ) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5] ) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probability = K.sigmoid(feats[..., 5:]) box_xy = (box_xy + grid) / K.cast(grid_shape[::-1], K.dtype(feats)) box_wh = box_wh * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats)) return box_xy, box_wh, box_confidence, box_class_probability
def sparse_amsoftmax_loss(y_true, y_pred, scale=24, margin=0.2): y_true = K.expand_dims(y_true[:, 0], 1) # shape=(None, 1) y_true = K.cast(y_true, 'int32') # dtype=int32 batch_idxs = K.arange(0, K.shape(y_true)[0]) batch_idxs = K.expand_dims(batch_idxs, 1) ordinal_y = K.concatenate([batch_idxs, y_true], 1) sel_logits = K.tf.gather_nd(y_pred, ordinal_y) t = K.tf.scatter_nd(ordinal_y, sel_logits * 0 + (-margin), K.tf.shape(y_pred)) comb_logits_diff = K.tf.add(y_pred, t) return K.sparse_categorical_crossentropy(y_true, scale * comb_logits_diff, from_logits=True)
def call(self, inputs, mask=None, **kwargs): if isinstance(inputs, list): query, key, value = inputs else: query = key = value = inputs if isinstance(mask, list): mask = mask[1] feature_dim = K.shape(query)[-1] e = K.batch_dot(query, key, axes=2) / K.sqrt(K.cast(feature_dim, dtype=K.floatx())) e = K.exp(e - K.max(e, axis=-1, keepdims=True)) if self.history_only: query_len, key_len = K.shape(query)[1], K.shape(key)[1] indices = K.expand_dims(K.arange(0, key_len), axis=0) upper = K.expand_dims(K.arange(0, query_len), axis=-1) e *= K.expand_dims(K.cast(indices <= upper, K.floatx()), axis=0) if mask is not None: e *= K.cast(K.expand_dims(mask, axis=-2), K.floatx()) a = e / (K.sum(e, axis=-1, keepdims=True) + K.epsilon()) v = K.batch_dot(a, value) if self.return_attention: return [v, a] return v
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): """Convert final layer features to bounding box parameters.""" #feats:网络输出的节骨. num_anchors = len(anchors) #因为 anchors是高维向量,所以len 不是里面数字的数量.所以下面的reshape是正确的. # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) #concatenate 默认-1轴 grid = K.cast(grid, K.dtype(feats)) #grid 前2维 是 [[0--13],....[0----13]] feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) #使用sigmoid 值域0,1 所以加完grid之后表示box_xy中心在第几个grid行列中,只不过这个行列是一个float,再除以总的grid总数 #就得到了feats对应的坐标的行列坐标占图片长,宽的比例是多少!!!!!!!!!!!! #还是原来的思路,神经网络的输出不用管他表示什么含义,他表示的含义都是逆向传导之后自己学到的. #反过来看,feats[..., :2]表示的就是跟当前grid的偏移量被作用sigmoid反函数.这个是容易学到的东西. #因为这个值有一个基准,是当前grid,所以模型稳定,收敛速度快!不是像以前一样全图片搜索.而是在grid中心附近搜索框. #并且不超过这个grid. 感觉这里就是yolov2最难理解的地方.全靠bp思想理解. # Adjust preditions to each spatial grid point and anchor size.//box_xy: shape(2,)/(13,13) #下面/ 是对-1维除的 box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( grid_shape[::-1], K.dtype(feats)) #这两个很神秘! box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast( input_shape[::-1], K.dtype(feats)) #下面学习box_wh. K.exp(feats[..., 2:4]) * anchors_tensor 表示学习到的框大小. #这里面用exp来让学到的东西变化不大. 缩小了取值范围,提高收敛速度.感觉本质就是提高学习率了. box_confidence = K.sigmoid(feats[..., 4:5]) #这2个归一化,sigmoid好理解. box_class_probs = K.sigmoid(feats[..., 5:]) if calc_loss == True: #看这个grid:表示网格 return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def sparse_amsoftmax_loss(y_true, y_pred, scale=30, margin=0.35): y_true = K.expand_dims(y_true[:, 0], 1) # 保证y_true的shape=(None, 1) y_true = K.cast(y_true, 'int32') # 保证y_true的dtype=int32 batch_idxs = K.arange(0, K.shape(y_true)[0]) batch_idxs = K.expand_dims(batch_idxs, 1) idxs = K.concatenate([batch_idxs, y_true], 1) y_true_pred = K.tf.gather_nd(y_pred, idxs) # 目标特征,用tf.gather_nd提取出来 y_true_pred = K.expand_dims(y_true_pred, 1) y_true_pred_margin = y_true_pred - margin # 减去margin _Z = K.concatenate([y_pred, y_true_pred_margin], 1) # 为计算配分函数 _Z = _Z * scale # 缩放结果,主要因为pred是cos值,范围[-1, 1] logZ = K.logsumexp(_Z, 1, keepdims=True) # 用logsumexp,保证梯度不消失 logZ = logZ + K.log(1 - K.exp(scale * y_true_pred - logZ)) # 从Z中减去exp(scale * y_true_pred) return - y_true_pred_margin * scale + logZ
def call(self, x, mask=None): if (self.size == None) or (self.mode == 'sum'): self.size = int(x.shape[-1]) batch_size, seq_len = K.shape(x)[0], K.shape(x)[1] position_j = 1. / K.pow(10000., 2 * K.arange(self.size / 2, dtype='float32') / self.size) position_j = K.expand_dims(position_j, 0) position_i = K.cumsum(K.ones_like(x[:, :, 0]), 1) - 1 # K.arange不支持变长,只好用这种方法生成 position_i = K.expand_dims(position_i, 2) position_ij = K.dot(position_i, position_j) position_ij = K.concatenate([K.cos(position_ij), K.sin(position_ij)], 2) if self.mode == 'sum': return position_ij + x elif self.mode == 'concat': return K.concatenate([position_ij, x], 2)
def sparse_simpler_asoftmax_loss(y_true, y_pred, scale=30): y_true = K.expand_dims(y_true[:, 0], 1) # 保证y_true的shape=(None, 1) y_true = K.cast(y_true, 'int32') # 保证y_true的dtype=int32 batch_idxs = K.arange(0, K.shape(y_true)[0]) batch_idxs = K.expand_dims(batch_idxs, 1) idxs = K.concatenate([batch_idxs, y_true], 1) y_true_pred = K.tf.gather_nd(y_pred, idxs) # 目标特征,用tf.gather_nd提取出来 y_true_pred = K.expand_dims(y_true_pred, 1) # 用到了四倍角公式进行展开 y_true_pred_margin = 1 - 8 * K.square(y_true_pred) + 8 * K.square(K.square(y_true_pred)) # 下面等效于min(y_true_pred, y_true_pred_margin) y_true_pred_margin = y_true_pred_margin - K.relu(y_true_pred_margin - y_true_pred) _Z = K.concatenate([y_pred, y_true_pred_margin], 1) # 为计算配分函数 _Z = _Z * scale # 缩放结果,主要因为pred是cos值,范围[-1, 1] logZ = K.logsumexp(_Z, 1, keepdims=True) # 用logsumexp,保证梯度不消失 logZ = logZ + K.log(1 - K.exp(scale * y_true_pred - logZ)) # 从Z中减去exp(scale * y_true_pred) return - y_true_pred_margin * scale + logZ
def test_map(self): x = np.random.rand(10, 3).astype(np.float32) for K in [KTF, KTH]: vx = K.variable(x) kx = K.eval(K.map_fn(K.sum, vx)) # make sure we can also walk the indexes in tensorflow which we # can't without specifying dtype kx2 = K.eval(K.map_fn( lambda i: K.sum(vx[i]), K.arange(10), dtype=K.floatx() )) assert (10,) == kx.shape assert (10,) == kx2.shape assert_allclose(x.sum(axis=1), kx, atol=1e-05) assert_allclose(kx, kx2, atol=1e-05)
def _interpolate(self, image, sampled_grids, output_size): batch_size = K.shape(image)[0] height = K.shape(image)[1] width = K.shape(image)[2] num_channels = K.shape(image)[3] x = K.cast(K.flatten(sampled_grids[:, 0:1, :]), dtype='float32') y = K.cast(K.flatten(sampled_grids[:, 1:2, :]), dtype='float32') x = .5 * (x + 1.0) * K.cast(width, dtype='float32') y = .5 * (y + 1.0) * K.cast(height, dtype='float32') x0 = K.cast(x, 'int32') x1 = x0 + 1 y0 = K.cast(y, 'int32') y1 = y0 + 1 max_x = int(K.int_shape(image)[2] - 1) max_y = int(K.int_shape(image)[1] - 1) x0 = K.clip(x0, 0, max_x) x1 = K.clip(x1, 0, max_x) y0 = K.clip(y0, 0, max_y) y1 = K.clip(y1, 0, max_y) pixels_batch = K.arange(0, batch_size) * (height * width) pixels_batch = K.expand_dims(pixels_batch, axis=-1) flat_output_size = output_size[0] * output_size[1] base = K.repeat_elements(pixels_batch, flat_output_size, axis=1) base = K.flatten(base) # base_y0 = base + (y0 * width) base_y0 = y0 * width base_y0 = base + base_y0 # base_y1 = base + (y1 * width) base_y1 = y1 * width base_y1 = base_y1 + base indices_a = base_y0 + x0 indices_b = base_y1 + x0 indices_c = base_y0 + x1 indices_d = base_y1 + x1 flat_image = K.reshape(image, shape=(-1, num_channels)) flat_image = K.cast(flat_image, dtype='float32') pixel_values_a = K.gather(flat_image, indices_a) pixel_values_b = K.gather(flat_image, indices_b) pixel_values_c = K.gather(flat_image, indices_c) pixel_values_d = K.gather(flat_image, indices_d) x0 = K.cast(x0, 'float32') x1 = K.cast(x1, 'float32') y0 = K.cast(y0, 'float32') y1 = K.cast(y1, 'float32') area_a = K.expand_dims(((x1 - x) * (y1 - y)), 1) area_b = K.expand_dims(((x1 - x) * (y - y0)), 1) area_c = K.expand_dims(((x - x0) * (y1 - y)), 1) area_d = K.expand_dims(((x - x0) * (y - y0)), 1) values_a = area_a * pixel_values_a values_b = area_b * pixel_values_b values_c = area_c * pixel_values_c values_d = area_d * pixel_values_d return values_a + values_b + values_c + values_d
def yolo_head(feats, anchors, num_classes): """Convert final layer features to bounding box parameters. Parameters ---------- feats : tensor Final convolutional layer features. anchors : array-like Anchor box widths and heights. num_classes : int Number of target classes. Returns ------- box_xy : tensor x, y box predictions adjusted by spatial location in conv layer. box_wh : tensor w, h box predictions adjusted by anchors and conv spatial resolution. box_conf : tensor Probability estimate for whether each box contains any object. box_class_pred : tensor Probability distribution estimate for each box over class labels. """ num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) # Static implementation for fixed models. # TODO: Remove or add option for static implementation. # _, conv_height, conv_width, _ = K.int_shape(feats) # conv_dims = K.variable([conv_width, conv_height]) # Dynamic implementation of conv dims for fully convolutional model. conv_dims = K.shape(feats)[1:3] # assuming channels last # In YOLO the height index is the inner most iteration. conv_height_index = K.arange(0, stop=conv_dims[0]) conv_width_index = K.arange(0, stop=conv_dims[1]) conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) # TODO: Repeat_elements and tf.split doesn't support dynamic splits. # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0) conv_width_index = K.tile( K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) conv_width_index = K.flatten(K.transpose(conv_width_index)) conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) conv_index = K.cast(conv_index, K.dtype(feats)) feats = K.reshape( feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) # Static generation of conv_index: # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)]) # conv_index = conv_index[:, [1, 0]] # swap columns for YOLO ordering. # conv_index = K.variable( # conv_index.reshape(1, conv_height, conv_width, 1, 2)) # feats = Reshape( # (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.softmax(feats[..., 5:]) # Adjust preditions to each spatial grid point and anchor size. # Note: YOLO iterates over height index before width index. box_xy = (box_xy + conv_index) / conv_dims box_wh = box_wh * anchors_tensor / conv_dims return box_xy, box_wh, box_confidence, box_class_probs
def call(self, z, mask=None): x, a = z return K.expand_dims(x[:, -1] - K.mean(x[:, :-1], axis=1) + x[K.arange(x.shape[0]), K.flatten(a)])
def call(self, inputs, training=None, mask=None): input_shape = K.shape(inputs) if self.rank == 1: input_shape = [input_shape[i] for i in range(3)] batch_shape, dim, channels = input_shape xx_range = K.tile(K.expand_dims(K.arange(0, dim), axis=0), K.stack([batch_shape, 1])) xx_range = K.expand_dims(xx_range, axis=-1) xx_channels = K.cast(xx_range, K.floatx()) xx_channels = xx_channels / K.cast(dim - 1, K.floatx()) xx_channels = (xx_channels * 2) - 1. outputs = K.concatenate([inputs, xx_channels], axis=self.axis) if self.rank == 2: if self.data_format == 'channels_first': inputs = K.permute_dimensions(inputs, [0, 2, 3, 1]) input_shape = [input_shape[i] for i in range(4)] batch_shape, dim1, dim2, channels = input_shape xx_ones = K.ones(K.stack([batch_shape, dim2]), dtype='int32') xx_ones = K.expand_dims(xx_ones, axis=-1) xx_range = K.tile(K.expand_dims(K.arange(0, dim1), axis=0), K.stack([batch_shape, 1])) xx_range = K.expand_dims(xx_range, axis=1) xx_channels = K.batch_dot(xx_ones, xx_range, axes=[2, 1]) xx_channels = K.expand_dims(xx_channels, axis=-1) xx_channels = K.permute_dimensions(xx_channels, [0, 2, 1, 3]) yy_ones = K.ones(K.stack([batch_shape, dim1]), dtype='int32') yy_ones = K.expand_dims(yy_ones, axis=1) yy_range = K.tile(K.expand_dims(K.arange(0, dim2), axis=0), K.stack([batch_shape, 1])) yy_range = K.expand_dims(yy_range, axis=-1) yy_channels = K.batch_dot(yy_range, yy_ones, axes=[2, 1]) yy_channels = K.expand_dims(yy_channels, axis=-1) yy_channels = K.permute_dimensions(yy_channels, [0, 2, 1, 3]) xx_channels = K.cast(xx_channels, K.floatx()) xx_channels = xx_channels / K.cast(dim1 - 1, K.floatx()) xx_channels = (xx_channels * 2) - 1. yy_channels = K.cast(yy_channels, K.floatx()) yy_channels = yy_channels / K.cast(dim2 - 1, K.floatx()) yy_channels = (yy_channels * 2) - 1. outputs = K.concatenate([inputs, xx_channels, yy_channels], axis=self.axis) if self.use_radius: rr = K.sqrt(K.square(xx_channels - 0.5) + K.square(yy_channels - 0.5)) outputs = K.concatenate([outputs, rr], axis=-1) if self.data_format == 'channels_first': outputs = K.permute_dimensions(outputs, [0, 3, 1, 2]) if self.rank == 3: if self.data_format == 'channels_first': inputs = K.permute_dimensions(inputs, [0, 2, 3, 4, 1]) input_shape = [input_shape[i] for i in range(5)] batch_shape, dim1, dim2, dim3, channels = input_shape xx_ones = K.ones(K.stack([batch_shape, dim3]), dtype='int32') xx_ones = K.expand_dims(xx_ones, axis=-1) xx_range = K.tile(K.expand_dims(K.arange(0, dim2), axis=0), K.stack([batch_shape, 1])) xx_range = K.expand_dims(xx_range, axis=1) xx_channels = K.batch_dot(xx_ones, xx_range, axes=[2, 1]) xx_channels = K.expand_dims(xx_channels, axis=-1) xx_channels = K.permute_dimensions(xx_channels, [0, 2, 1, 3]) xx_channels = K.expand_dims(xx_channels, axis=1) xx_channels = K.tile(xx_channels, [1, dim1, 1, 1, 1]) yy_ones = K.ones(K.stack([batch_shape, dim2]), dtype='int32') yy_ones = K.expand_dims(yy_ones, axis=1) yy_range = K.tile(K.expand_dims(K.arange(0, dim3), axis=0), K.stack([batch_shape, 1])) yy_range = K.expand_dims(yy_range, axis=-1) yy_channels = K.batch_dot(yy_range, yy_ones, axes=[2, 1]) yy_channels = K.expand_dims(yy_channels, axis=-1) yy_channels = K.permute_dimensions(yy_channels, [0, 2, 1, 3]) yy_channels = K.expand_dims(yy_channels, axis=1) yy_channels = K.tile(yy_channels, [1, dim1, 1, 1, 1]) zz_range = K.tile(K.expand_dims(K.arange(0, dim1), axis=0), K.stack([batch_shape, 1])) zz_range = K.expand_dims(zz_range, axis=-1) zz_range = K.expand_dims(zz_range, axis=-1) zz_channels = K.tile(zz_range, [1, 1, dim2, dim3]) zz_channels = K.expand_dims(zz_channels, axis=-1) xx_channels = K.cast(xx_channels, K.floatx()) xx_channels = xx_channels / K.cast(dim2 - 1, K.floatx()) xx_channels = xx_channels * 2 - 1. yy_channels = K.cast(yy_channels, K.floatx()) yy_channels = yy_channels / K.cast(dim3 - 1, K.floatx()) yy_channels = yy_channels * 2 - 1. zz_channels = K.cast(zz_channels, K.floatx()) zz_channels = zz_channels / K.cast(dim1 - 1, K.floatx()) zz_channels = zz_channels * 2 - 1. outputs = K.concatenate([inputs, zz_channels, xx_channels, yy_channels], axis=self.axis) if self.data_format == 'channels_first': outputs = K.permute_dimensions(outputs, [0, 4, 1, 2, 3]) return outputs