def build(self,inputs,targets): n=inputs.shape[0] if self.distance=='euclidean': dist=flow.math.pow(inputs,2) dist=flow.math.reduce_sum(dist, axis=1, keepdims=True) dist=np.tile(dist,(n, n)) dist_t=flow.transpose(dist) dist=dist+dist_t inputs_t=flow.transpose(inputs) dist=addmm(dist,inputs,inputs_t,beta=1,alpha=-2) dist=flow.clamp(min_value=1e-12) dist=flow.math.sqrt(dist) elif self.distance == 'cosine': fnorm=np.linalg.norm(inputs,ord=2,axis=1,keepdims=True) l2norm=np.tile(inputs,(inputs.shape)) l2norm=inputs/l2norm l2norm_t=flow.transpose(l2norm) dist=-np.matmul(l2norm,l2norm_t) target_expand=np.tile(targets,(n,n)) target_expand_t=flow.transpose(target_expand) mask=flow.math.equal(target_expand,target_expand_t) dist_ap, dist_an = [], [] for i in range(n): temp=np.ndarray.max(dist[i][mask[i]]) temp=flow.expand_dims(temp,axis=0) dist_ap.append(temp) temp=np.ndarray.min(dist[i][mask[i]==0]) temp=flow.expand_dims(temp,axis=0) dist_an.append(temp) dist_ap=flow.concat(dist_ap) dist_an=flow.concat(dist_an) y=flow.ones_like(dist_an) loss=self.ranking_loss(dist_an, dist_ap, y,margin=self.margin) return loss
def SeModule(name, x, channel, reduction=4): N, C, H, W = x.shape y = flow.nn.avg_pool2d(x, ksize=[H, W], strides=None, padding="SAME") y = flow.flatten(y, start_dim=1, end_dim=-1) y = flow.layers.dense( y, units=channel // reduction, use_bias=False, kernel_initializer=_get_initializer("dense_weight"), bias_initializer=_get_initializer("bias"), kernel_regularizer=_get_regularizer("dense_weight"), bias_regularizer=_get_regularizer("bias"), name=name + "dense1a", ) y = flow.math.relu(y) y = flow.layers.dense( y, units=channel, use_bias=False, kernel_initializer=_get_initializer("dense_weight"), bias_initializer=_get_initializer("bias"), kernel_regularizer=_get_regularizer("dense_weight"), bias_regularizer=_get_regularizer("bias"), name=name + "dense2", ) y = hsigmoid(y) y = flow.expand_dims(input=y, axis=2) y = flow.expand_dims(input=y, axis=3) y_expand = flow.broadcast_like(y, x, broadcast_axes=(2, 3)) out = x * y_expand return out
def positional_encoding(position, d_model, name="positional_encoding"): """ Do positional encoding :param position: The position :param d_model: The hidden dimension in model :return: shape like (1, position, d_model) """ with flow.scope.namespace(name): # shape = (position, 1) input_pos = flow.expand_dims(flow.range(position, dtype=flow.float32, name="pos"), axis=1) # shape = (1, d_model) input_d_model = flow.expand_dims(flow.range(d_model, dtype=flow.float32, name="d_model"), axis=0) angle_rads = get_angles(input_pos, input_d_model, d_model) # Get a even range like (0, 2, 4, 6, ....., d_model) even_range = flow.range(0, d_model, 2, dtype=flow.int32, name="even_range") # Do the sin in even indexes even_out = flow.math.sin(flow.gather(angle_rads, even_range, axis=1)) # Get a odd range like (1, 3, 5, 7, ....., d_model) odd_range = flow.range(1, d_model, 2, dtype=flow.int32, name="odd_range") # Do the cos in odd indexes odd_out = flow.math.cos(flow.gather(angle_rads, odd_range, axis=1)) # Initialize Position encode constant position_encode = flow.constant(0, dtype=flow.float32, shape=(d_model, position), name="pos_ende") # Due to the scatter only support row indexes, we need to transpose even_out = flow.tensor_scatter_nd_update(position_encode, flow.expand_dims(even_range, axis=1), flow.transpose(even_out, perm=[1, 0])) odd_out = flow.tensor_scatter_nd_update(position_encode, flow.expand_dims(odd_range, axis=1), flow.transpose(odd_out, perm=[1, 0])) # Add even indexes value and odd indexes value out = even_out + odd_out # Because We have transposed in even_out and odd_out, So we need to transpose back out = flow.transpose(out, perm=[1, 0]) # Expand dims in dim=0, we get shape like (1, position, d_model) out = flow.expand_dims(out, axis=0) return out
def create_padding_mask(seq, name="CreatePad"): """ Create padding mask :param seq: input sequence, shape=(batch, seq_lenth) :return: """ with flow.scope.namespace(name): seq = flow.cast( flow.math.equal( seq, flow.constant_scalar(0, dtype=flow.int64, name="zero_mask_scalar")), flow.float32) # Expand dims from (a, b) -> (a, 1, 1, b) seq = flow.expand_dims(seq, axis=1) seq = flow.expand_dims(seq, axis=1) return seq
def ExpandDimsJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "var", shape=x_shape, dtype=flow.float, initializer=flow.ones_initializer(), trainable=True, ) flow.watch_diff(x, check_grad) loss = flow.expand_dims(x, axis) flow.losses.add_loss(loss) return loss
def ExpandDimsJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "var", shape=x_shape, dtype=flow.float, initializer=flow.ones_initializer(), trainable=True, ) flow.watch_diff(x, check_grad) loss = flow.expand_dims(x, axis) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [1e-4]), momentum=0).minimize(loss) return loss
def __call__(self, x): """ Get embeddings of x :param x: An flow.int64 Tensor with shape [batchsize, length] :return: embeddings: float32 tensor with shape [batch_size, length, embedding_size] padding: float32 tensor with shape [batch_size, length] indicating the locations of the padding tokens in x. """ with flow.scope.namespace("embedding"): embeddings = flow.gather(self.embedding_table, x, axis=0) # Scale embedding by the sqrt of the hidden size embeddings *= self.hidden_size**0.5 # Create binary array of size [batch_size, length] # where 1 = padding, 0 = not padding padding = model_utils.get_padding(x) # Set all padding embedding values to 0 embeddings *= flow.expand_dims(1 - padding, -1) return embeddings
def __call__(self, x, padding=None): # Retrieve dynamically known shapes batch_size = x.shape[0] length = x.shape[1] if padding is not None: with flow.scope.namespace("remove_padding"): # Flatten padding to [batch_size*length] pad_mask = flow.reshape(padding, [-1]) nonpad_ids = flow.cast(flow.where(pad_mask < 1e-9), dtype=flow.int32) # nonpad_ids = tf.to_int32(tf.where(pad_mask < 1e-9)) # Reshape x to [batch_size*length, hidden_size] to remove padding x = flow.reshape(x, [-1, self.hidden_size]) x = flow.gather_nd(x, indices=nonpad_ids) # Reshape x from 2 dimensions to 3 dimensions. # TODO:Maybe has a batch axis error in there x = flow.expand_dims(x, axis=0) output = self._build_dense(x, self.filter_size, name="filter_layer") if self.train: # In TensorFlow the param means `keep_prob` and use `1-dropout`, # but our dropout means drop rate so i just use dropout ! output = flow.nn.dropout(output, self.relu_dropout) if padding is not None: with flow.scope.namespace("re_add_padding"): output = flow.squeeze(output, axis=[0, ]) output = flow.scatter_nd( indices=nonpad_ids, updates=output, shape=[batch_size * length, self.hidden_size] ) output = flow.reshape(output, [batch_size, length, self.hidden_size]) return output
def forward(self, inputs, targets): n = inputs.shape[0] # Compute pairwise distance, replace by the official when merged tempname = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S.%f') shape_tensor = flow.constant(value=0.0, dtype=flow.float32, shape=(n, n)) if self.distance == 'euclidean': blob_2 = flow.get_variable( "blob_2_" + tempname, shape=inputs.shape, initializer=flow.constant_initializer(2), dtype=inputs.dtype) dist = flow.math.pow(inputs, blob_2) dist = flow.math.reduce_sum(dist, axis=1, keepdims=True) dist = flow.broadcast_like(dist, shape_tensor) tempdist = flow.transpose(dist) dist = dist + tempdist inputs_t = flow.transpose(inputs) dist = addmm(dist, inputs, inputs_t, beta=1, alpha=-2) dist = flow.clamp(dist, min_value=1e-12) dist = flow.math.sqrt(dist) elif self.distance == 'cosine': #fnorm=flow.math.l2_normalize(inputs, axis=1) fnorm = flow.math.reduce_mean(flow.math.divide( inputs, flow.math.l2_normalize(inputs, axis=1)), axis=1, keepdims=True) expand_fnorm = flow.broadcast_like(fnorm, like=inputs, broadcast_axes=[1]) l2norm = flow.math.divide(inputs, expand_fnorm) l2norm_t = flow.transpose(l2norm, perm=(1, 0)) dist = flow.math.negative(flow.matmul(l2norm, l2norm_t)) # For each anchor, find the hardest positive and negative mask = math.equal( flow.broadcast_like(targets, like=shape_tensor, broadcast_axes=[1]), flow.transpose(flow.broadcast_like(targets, like=shape_tensor, broadcast_axes=[1]), perm=(1, 0), batch_axis_non_change=True)) mask_rev = math.not_equal( flow.broadcast_like(targets, like=shape_tensor, broadcast_axes=[1]), flow.transpose(flow.broadcast_like(targets, like=shape_tensor, broadcast_axes=[1]), perm=(1, 0), batch_axis_non_change=True)) dist_ap, dist_an = [], [] for i in range(n): temp_dist = flow.slice_v2(dist, [(i, i + 1, 1)]) temp_mask = flow.slice_v2(mask, [(i, i + 1, 1)]) temp_mask_rev = flow.slice_v2(mask_rev, [(i, i + 1, 1)]) temp_dist_ap = flow.expand_dims( math.reduce_max( flow.gather_nd(temp_dist, flow.where(temp_mask))), 0) temp_dist_an = flow.expand_dims( math.reduce_min( flow.gather_nd(temp_dist, flow.where(temp_mask_rev))), 0) dist_ap.append(temp_dist_ap) dist_an.append(temp_dist_an) dist_ap = flow.concat(dist_ap, 0) dist_an = flow.concat(dist_an, 0) y = flow.ones_like(dist_an) return self._MarginRankingLoss(dist_an, dist_ap, y)
def build_network(self,inputs): b,c,t,h,w=inputs.shape N=self.time_dim templist=[] for i in range(N): tempname=datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S.%f') if i!=N//2: out = flow.range(t, dtype=flow.int64) one = flow.constant_like(out, i, dtype= flow.int64) out=flow.math.add(out, one) out=flow.expand_dims(out,axis=0) templist.append(out) neighbor_time_index=flow.concat(templist,axis=0) neighbor_time_index=flow.transpose(neighbor_time_index,[1,0]) neighbor_time_index=flow.flatten(neighbor_time_index, start_dim=0, end_dim=-1) # feature map registration tempname=datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S.%f') init=flow.kaiming_initializer(shape=inputs.shape,mode="fan_out",nonlinearity="relu") semantic=conv3d_layer("conv_semantic_"+tempname,inputs,self.out_channels, kernel_size=1,use_bias=False,padding="VALID",trainable=self.trainable, weight_initializer=init ) inputs_norm=flow.math.l2_normalize( semantic,axis=1 ) inputs_norm_padding=flow.pad(inputs_norm,paddings=[ (0,0),(0,0),((self.time_dim-1)//2,(self.time_dim-1)//2), (0,0),(0,0)] ) inputs_norm_expand=flow.expand_dims(inputs_norm,axis=3) temp_inputs_norm_expand=inputs_norm_expand for i in range(N-2): inputs_norm_expand=flow.concat( inputs=[ inputs_norm_expand,temp_inputs_norm_expand], axis=3 ) inputs_norm_expand=flow.transpose(inputs_norm_expand,perm=[0, 2, 3, 4, 5, 1]) inputs_norm_expand=flow.reshape(inputs_norm_expand,shape=[-1, h*w, c//16]) slice_list=[] for index in neighbor_time_index: temp=flow.slice( inputs_norm_padding, begin=[None,None,int(index),None,None], size=[None,None,1,None,None] ) slice_list.append(temp) neighbor_norm=flow.concat( slice_list,axis=2 ) neighbor_norm=flow.transpose(neighbor_norm,perm=[0, 2, 1, 3, 4]) neighbor_norm=flow.reshape(neighbor_norm,shape=[-1, c//16, h*w]) similarity=flow.matmul(inputs_norm_expand,neighbor_norm)*self.temperature similarity=nn.softmax(similarity,axis=-1) inputs_padding=flow.pad(inputs, paddings=[ (0,0),(0,0),((self.time_dim-1)//2,(self.time_dim-1)//2), (0,0),(0,0)] ) slice_list=[] for index in neighbor_time_index: temp=flow.slice( inputs_padding, begin=[None,None,int(index),None,None], size=[None,None,1,None,None] ) slice_list.append(temp) neighbor=flow.concat( slice_list,axis=2 ) neighbor=flow.transpose(neighbor,perm=[0,2,3,4,1]) neighbor=flow.reshape(neighbor,shape=[-1, h*w, c]) neighbor_new=flow.matmul(similarity,neighbor) neighbor_new=flow.reshape(neighbor_new,shape=[b, t*(N-1), h, w, c]) neighbor_new=flow.transpose(neighbor_new,perm=[0, 4, 1, 2, 3]) # contrastive attention if self.contrastive_att: temp_input=flow.expand_dims(inputs,axis=3) temp_temp_input=temp_input for i in range(N-2): temp_input=flow.concat( inputs=[ temp_input,temp_temp_input], axis=3 ) temp_input=flow.reshape(temp_input,shape=[b, c, (N-1)*t, h, w]) input_att=conv3d_layer( "conv3d_inputmapping_"+tempname,temp_input,self.out_channels, kernel_size=1, use_bias=False,trainable=False,weight_initializer=flow.kaiming_initializer(shape=temp_input.shape,mode="fan_out",nonlinearity="relu") ) n_att=conv3d_layer( "conv3d_nmapping_"+tempname,neighbor_new,self.out_channels, kernel_size=1, use_bias=False,trainable=False,weight_initializer=flow.kaiming_initializer(shape=neighbor_new.shape,mode="fan_out",nonlinearity="relu") ) temp_input=input_att*n_att contrastive_att_net=conv3d_layer( "conv3d_att_net_"+tempname,temp_input,1, kernel_size=1, use_bias=False,trainable=self.trainable,weight_initializer=flow.kaiming_initializer(shape=temp_input.shape,mode="fan_out",nonlinearity="relu") ) contrastive_att_net=flow.math.sigmoid(contrastive_att_net) neighbor_new=flow.math.multiply( neighbor_new,contrastive_att_net ) # integrating feature maps init = flow.zeros_initializer() tempname=datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S.%f') input_offset = flow.get_variable( "input_offset_"+tempname, shape=(b, c, N*t, h, w), initializer=init, dtype=inputs.dtype, trainable=self.trainable) with flow.scope.placement("cpu", "0:0"): input_index=np.array( [i for i in range(t*N) if i%N==N//2] ) neighbor_index=np.array( [i for i in range(t*N) if i%N!=N//2]) input_offset_list=[] inputs_list=[] neighbor_new_list=[] for index in range(input_offset.shape[2]): temp=flow.slice( input_offset, begin=[None,None,int(index),None,None], size=[None,None,1,None,None] ) input_offset_list.append(temp) for index in range(inputs.shape[2]): temp=flow.slice( inputs, begin=[None,None,int(index),None,None], size=[None,None,1,None,None] ) inputs_list.append(temp) for index in range(neighbor_new.shape[2]): temp=flow.slice( neighbor_new, begin=[None,None,int(index),None,None], size=[None,None,1,None,None] ) neighbor_new_list.append(temp) temp_index=0 for index in input_index: input_offset_list[index]+=inputs_list[temp_index] temp_index+=1 temp_index=0 for index in neighbor_index: input_offset_list[index]+=neighbor_new_list[temp_index] temp_index+=1 input_offset=flow.concat( input_offset_list,axis=2 ) return input_offset
def build_network(self, inputs): b, c, t, h, w = inputs.shape N = self.time_dim templist = [np.arange(0, t) + i for i in range(N) if i != N // 2] templist = np.expand_dims(templist, axis=0) neighbor_time_index = np.concatenate(templist, axis=0) # neighbor_time_index=flow.concat( # templist,axis=0 # ) neighbor_time_index = np.transpose(neighbor_time_index) neighbor_time_index = np.ndarray.flatten(neighbor_time_index) #寻找tensor.long的代替(把tensor变成longtensor) #tensor 中long 是64整形 neighbor_time_index = np.int64(neighbor_time_index) semantic = conv3d_layer("conv_semantic_", inputs, self.out_channels, kernel_size=1, use_bias=False, padding="SAME") inputs_norm = flow.math.l2_normalize(semantic, axis=1) inputs_norm_padding = flow.pad(inputs_norm, paddings=[(0, 0), (0, 0), ((self.time_dim - 1) // 2, (self.time_dim - 1) // 2), (0, 0), (0, 0)]) inputs_norm_expand = flow.expand_dims(inputs_norm, axis=3) temp_inputs_norm_expand = inputs_norm_expand for i in range(N - 2): inputs_norm_expand = flow.concat( inputs=[inputs_norm_expand, temp_inputs_norm_expand], axis=3) #inputs_norm_expand=flow.transpose(inputs_norm_expand,perm=[0, 2, 3, 4, 5, 1]) print("inputs_norm_expand", inputs_norm_expand.shape) inputs_norm_expand = flow.reshape( inputs_norm_expand, (inputs_norm_expand.shape[0], inputs_norm_expand.shape[2], inputs_norm_expand.shape[3], inputs_norm_expand.shape[4], inputs_norm_expand.shape[5], inputs_norm_expand.shape[1])) inputs_norm_expand = flow.reshape(inputs_norm_expand, shape=[-1, h * w, c // 16]) slice_list = [] for index in neighbor_time_index: temp = flow.slice( inputs_norm_padding, begin=[None, None, int(index), None, None], #size=[None,slice_shape[1],1,slice_shape[3],slice_shape[4]] size=[None, None, 1, None, None]) slice_list.append(temp) neighbor_norm = flow.concat(slice_list, axis=2) neighbor_norm = flow.transpose(neighbor_norm, perm=[0, 2, 1, 3, 4]) #inputs_norm_expand=flow.reshape(neighbor_norm,(neighbor_norm.shape[0],neighbor_norm.shape[2],neighbor_norm.shape[3],neighbor_norm.shape[4],neighbor_norm.shape[5],neighbor_norm.shape[1])) neighbor_norm = flow.reshape(neighbor_norm, shape=[-1, c // 16, h * w]) similarity = flow.matmul(inputs_norm_expand, neighbor_norm) * self.temperature similarity = nn.softmax(similarity, axis=-1) inputs_padding = flow.pad(inputs, paddings=[(0, 0), (0, 0), ((self.time_dim - 1) // 2, (self.time_dim - 1) // 2), (0, 0), (0, 0)]) #neighbor=inputs_padding[:, :, neighbor_time_index, :, :] slice_list = [] for index in neighbor_time_index: temp = flow.slice(inputs_padding, begin=[None, None, int(index), None, None], size=[None, None, 1, None, None]) slice_list.append(temp) neighbor = flow.concat(slice_list, axis=2) neighbor = flow.transpose(neighbor, perm=[0, 2, 3, 4, 1]) neighbor = flow.reshape(neighbor, shape=[-1, h * w, c]) neighbor_new = flow.matmul(similarity, neighbor) neighbor_new = flow.reshape(neighbor_new, shape=[b, t * (N - 1), h, w, c]) neighbor_new = flow.transpose(neighbor_new, perm=[0, 4, 1, 2, 3]) if self.contrastive_att: temp_input = flow.expand_dims(inputs, axis=3) temp_temp_input = temp_input temp_input = flow.concat(inputs=[temp_input, temp_temp_input], axis=3) temp_input = flow.reshape(temp_input, shape=[b, c, (N - 1) * t, h, w]) input_att = conv3d_layer("conv3d_inputmapping", temp_input, self.out_channels, kernel_size=1, use_bias=False, trainable=False) n_att = conv3d_layer("conv3d_nmapping", neighbor_new, self.out_channels, kernel_size=1, use_bias=False, trainable=False) contrastive_att_net = conv3d_layer("conv3d_att_net", input_att * n_att, self.out_channels, kernel_size=1, use_bias=False) constastive_att = flow.math.sigmoid(contrastive_att_net) neighbor_new = neighbor_new * self.contrastive_att #device 暂时先空着了 input_offset = np.zeros([b, c, N * t, h, w], dtype=np.float) init = flow.zeros_initializer() input_offset = flow.get_variable("input_offset", shape=(b, c, N * t, h, w), initializer=init, dtype=inputs.dtype, trainable=True) input_index = np.array([i for i in range(t * N) if i % N == N // 2]) neighbor_index = np.array([i for i in range(t * N) if i % N != N // 2]) # print("inputs: ",inputs.shape) # print("input_index:",input_index) # print("input_index_len:",len(input_index)) print("input_offset:", input_offset.shape) input_offset_list = [] inputs_list = [] neighbor_new_list = [] for index in range(input_offset.shape[2]): temp = flow.slice(input_offset, begin=[None, None, int(index), None, None], size=[None, None, 1, None, None]) input_offset_list.append(temp) for index in range(inputs.shape[2]): temp = flow.slice(inputs, begin=[None, None, int(index), None, None], size=[None, None, 1, None, None]) inputs_list.append(temp) for index in range(neighbor_new.shape[2]): temp = flow.slice(neighbor_new, begin=[None, None, int(index), None, None], size=[None, None, 1, None, None]) neighbor_new_list.append(temp) temp_index = 0 for index in input_index: input_offset_list[index] += inputs_list[temp_index] temp_index += 1 # print("neighbor_new:",neighbor_new.shape) # print("neighbor_index:",neighbor_index.shape) temp_index = 0 for index in neighbor_index: input_offset_list[index] += neighbor_new_list[temp_index] temp_index += 1 # print("before",input_offset.shape) input_offset = flow.concat(input_offset_list, axis=2) print("after", input_offset.shape) return input_offset
def loss_layer(self, feature_map, pred, label, bboxes, stride, prefix='loss_layer'): ''' :param feature_map: [N, H, W, 3*(5+class_num)] :param pred: [N, H, W, 3, 4+1+class_num] :param label: [N, H, W, 3, 4+1+class_num] :param bboxes: [N, V, 4] :param stride: :param anchor_per_scale: :return: giou_loss: conf_loss: prob_loss: ''' feature_map = flow.reshape( feature_map, shape=(feature_map.shape[0], feature_map.shape[1], feature_map.shape[2], self.anchor_per_scale, -1)) # shape: [N, H, W, 3, 1] raw_conf = flow.slice(feature_map, begin=[None, None, None, None, 4], size=[None, None, None, None, 1]) # shape: [N, H, W, 3, class_num] raw_prob = flow.slice( feature_map, begin=[None, None, None, None, 5], size=[None, None, None, None, feature_map.shape[-1] - 5]) # [N, H, W, 3, 4] pred_xywh = flow.slice(pred, begin=[None, None, None, None, 0], size=[None, None, None, None, 4]) pred_conf = flow.slice(pred, begin=[None, None, None, None, 4], size=[None, None, None, None, 1]) #flow.slice(label, begin=[None, None, None, None, 0], size=[None, None, None, None, 4]) label_xywh = flow.slice(label, begin=[None, None, None, None, 0], size=[None, None, None, None, 4]) respond_bbox = flow.slice(label, begin=[None, None, None, None, 4], size=[None, None, None, None, 1]) label_prob = flow.slice( label, begin=[None, None, None, None, 5], size=[None, None, None, None, label.shape[-1] - 5]) # [N, H, W, 3, 1] giou = self.bbox_giou(pred_xywh, label_xywh) # label_w = flow.slice(label, begin=[None, None, None, None, 2], size=[None, None, None, None, 1]) # label_h = flow.slice(label, begin=[None, None, None, None, 3], size=[None, None, None, None, 1]) # bbox_loss_scale = 2.0 - 1.0 * label_w * label_h / ((stride * feature_map.shape[1]) ** 2) #??? # [N, H, W, 3, 1] # giou_loss = respond_bbox * bbox_loss_scale * (1 - giou) giou_loss = respond_bbox * (1 - giou) # [N, 1, 1, 1, V, 4] bboxes_ = flow.expand_dims(bboxes, axis=1) bboxes_ = flow.expand_dims(bboxes_, axis=1) bboxes_ = flow.expand_dims(bboxes_, axis=1) # [N, H, W, 3, V] iou = self.bbox_iou(flow.expand_dims(pred_xywh, axis=-2), bboxes_) iou = flow.squeeze(iou, axis=[ -1, ]) # [N, H, W, 3, 1] max_iou = flow.math.reduce_max(iou, axis=-1, keepdims=True) # respond_bgd = (1.0 - respond_bbox) * (max_iou < self.iou_loss_thresh) tmp = flow.math.less( max_iou, flow.constant_like(like=max_iou, value=self.iou_loss_thresh, dtype=flow.float32)) # respond_bgd = (1.0 - respond_bbox) * tmp respond_bgd = flow.where( tmp, 1.0 - respond_bbox, flow.zeros_like(respond_bbox, dtype=flow.float32)) # [N, H, W, 3, 1] # ce = flow.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=raw_conf) # alpha_t = respond_bbox*self.focus_loss_alpha+(1.0-respond_bbox)*(1.0-self.focus_loss_alpha) # conf_loss = alpha_t*flow.math.pow(1.0-flow.math.exp(flow.math.negative(ce)), self.focus_loss_gamma)*ce # conf_loss = (respond_bbox+respond_bgd)*conf_loss conf_focal = self.focal(respond_bbox, pred_conf) conf_loss = conf_focal * ( respond_bbox * flow.nn.sigmoid_cross_entropy_with_logits( labels=respond_bbox, logits=raw_conf) + respond_bgd * flow.nn.sigmoid_cross_entropy_with_logits( labels=respond_bbox, logits=raw_conf)) # [N, H, W, 3, 1] prob_loss = respond_bbox * flow.nn.sigmoid_cross_entropy_with_logits( labels=label_prob, logits=raw_prob) #?? # label_w = flow.slice(label, begin=[None, None, None, None, 2], size=[None, None, None, None, 1]) # label_h = flow.slice(label, begin=[None, None, None, None, 3], size=[None, None, None, None, 1]) # bbox_loss_scale = 2.0 - 1.0 * label_w * label_h / ((stride * feature_map.shape[1]) * (stride * feature_map.shape[2])) #??? # # [N, H, W, 3, 1] # giou_loss = respond_bbox * bbox_loss_scale * flow.smooth_l1_loss(prediction=pred_xywh, label=label_xywh) giou_loss = flow.math.reduce_mean( flow.math.reduce_sum(giou_loss, axis=[1, 2, 3, 4])) conf_loss = flow.math.reduce_mean( flow.math.reduce_sum(conf_loss, axis=[1, 2, 3, 4])) prob_loss = flow.math.reduce_mean( flow.math.reduce_sum(prob_loss, axis=[1, 2, 3, 4])) return giou_loss, conf_loss, prob_loss
def decode(self, feature_map, anchors, stride, prefix='yolo'): ''' return tensor of shape [batch_size, output_size, output_size, anchor_per_scale, 5 + num_classes] contains (x, y, w, h, score, probability) :param feature_map: [N, H, W, 3 * (5 + num_class)] :param anchors: [3, 2] :param stride: :return: (x, y, w, h, score, probability) [pred_xywh, pred_conf, pred_prob]: [N, H, W, 3, 4+1+class_num] ''' # [N, H, W, 3, 5 + num_class] feature_map = flow.reshape( feature_map, shape=(feature_map.shape[0], feature_map.shape[1], feature_map.shape[2], self.anchor_per_scale, -1)) # shape: [N, H, W, 3, 2] box_centers = flow.slice(feature_map, begin=[None, None, None, None, 0], size=[None, None, None, None, 2]) # shape: [N, H, W, 3, 2] box_sizes = flow.slice(feature_map, begin=[None, None, None, None, 2], size=[None, None, None, None, 2]) # shape: [N, H, W, 3, 1] conf_logits = flow.slice(feature_map, begin=[None, None, None, None, 4], size=[None, None, None, None, 1]) # shape: [N, H, W, 3, class_num] prob_logits = flow.slice( feature_map, begin=[None, None, None, None, 5], size=[None, None, None, None, feature_map.shape[-1] - 5]) # obtain the x_y_offset grid_size = feature_map.shape[1:3] grid_x = flow.range(grid_size[1], dtype=flow.float32, name=prefix + '_decode_range1') grid_x = flow.expand_dims(grid_x, axis=0) like_tensor = flow.constant(value=1.0, dtype=flow.float32, shape=(grid_size[0], grid_size[1])) grid_x = flow.broadcast_like(grid_x, like_tensor, broadcast_axes=(0, ), name=prefix + 'yolo_grid_x') grid_y = flow.range(grid_size[0], dtype=flow.float32, name=prefix + '_yolo_decode_range2') grid_y = flow.expand_dims(grid_y, axis=1) grid_y = flow.broadcast_like(grid_y, like_tensor, broadcast_axes=(1, ), name=prefix + 'yolo_grid_y') x_offset = flow.expand_dims(grid_x, axis=-1) y_offset = flow.expand_dims(grid_y, axis=-1) #shape: [1, H, W, 1 ,2] x_y_offset = flow.concat([x_offset, y_offset], axis=-1) x_y_offset = flow.expand_dims(x_y_offset, axis=0) x_y_offset = flow.expand_dims(x_y_offset, axis=-2) pred_xy = (flow.math.sigmoid(box_centers) + x_y_offset) * stride pred_wh = (flow.math.exp(box_sizes) * anchors) * stride # anchor relative to the feature map # shape: [N, H, W, 3, 4] pred_xywh = flow.concat([pred_xy, pred_wh], axis=-1) pred_conf = flow.math.sigmoid(conf_logits) pred_prob = flow.math.sigmoid(prob_logits) pred = flow.concat([pred_xywh, pred_conf, pred_prob], axis=-1) # shape: # pred: [N, H, W, 3, 4+1+class_num] # x_y_offset: [1, H, W, 1, 2] return pred, x_y_offset