def mean_dist(source_points,warped_points,L_pck): # compute precentage of correct keypoints batch_size=source_points.size(0) dist=torch.zeros((batch_size)) for i in range(batch_size): p_src = source_points[i,:] p_wrp = warped_points[i,:] N_pts = torch.sum(torch.ne(p_src[0,:],-1)*torch.ne(p_src[1,:],-1)) point_distance = torch.pow(torch.sum(torch.pow(p_src[:,:N_pts]-p_wrp[:,:N_pts],2),0),0.5) L_pck_mat = L_pck[i].expand_as(point_distance) dist[i]=torch.mean(torch.div(point_distance,L_pck_mat)) return dist
def pck(source_points,warped_points,L_pck,alpha=0.1): # compute precentage of correct keypoints batch_size=source_points.size(0) pck=torch.zeros((batch_size)) for i in range(batch_size): p_src = source_points[i,:] p_wrp = warped_points[i,:] N_pts = torch.sum(torch.ne(p_src[0,:],-1)*torch.ne(p_src[1,:],-1)) point_distance = torch.pow(torch.sum(torch.pow(p_src[:,:N_pts]-p_wrp[:,:N_pts],2),0),0.5) L_pck_mat = L_pck[i].expand_as(point_distance) correct_points = torch.le(point_distance,L_pck_mat*alpha) pck[i]=torch.mean(correct_points.float()) return pck
def forward(self, s1, s2): # pylint: disable=arguments-differ """ """ # Embeddings s1_embs = self._highway_layer(self._text_field_embedder(s1)) s2_embs = self._highway_layer(self._text_field_embedder(s2)) if self._elmo is not None: s1_elmo_embs = self._elmo(s1['elmo']) s2_elmo_embs = self._elmo(s2['elmo']) if "words" in s1: s1_embs = torch.cat([s1_embs, s1_elmo_embs['elmo_representations'][0]], dim=-1) s2_embs = torch.cat([s2_embs, s2_elmo_embs['elmo_representations'][0]], dim=-1) else: s1_embs = s1_elmo_embs['elmo_representations'][0] s2_embs = s2_elmo_embs['elmo_representations'][0] if self._cove is not None: s1_lens = torch.ne(s1['words'], self.pad_idx).long().sum(dim=-1).data s2_lens = torch.ne(s2['words'], self.pad_idx).long().sum(dim=-1).data s1_cove_embs = self._cove(s1['words'], s1_lens) s1_embs = torch.cat([s1_embs, s1_cove_embs], dim=-1) s2_cove_embs = self._cove(s2['words'], s2_lens) s2_embs = torch.cat([s2_embs, s2_cove_embs], dim=-1) s1_embs = self._dropout(s1_embs) s2_embs = self._dropout(s2_embs) # Set up masks s1_mask = util.get_text_field_mask(s1) s2_mask = util.get_text_field_mask(s2) s1_lstm_mask = s1_mask.float() if self._mask_lstms else None s2_lstm_mask = s2_mask.float() if self._mask_lstms else None # Sentence encodings with LSTMs s1_enc = self._phrase_layer(s1_embs, s1_lstm_mask) s2_enc = self._phrase_layer(s2_embs, s2_lstm_mask) if self._elmo is not None and len(s1_elmo_embs['elmo_representations']) > 1: s1_enc = torch.cat([s1_enc, s1_elmo_embs['elmo_representations'][1]], dim=-1) s2_enc = torch.cat([s2_enc, s2_elmo_embs['elmo_representations'][1]], dim=-1) s1_enc = self._dropout(s1_enc) s2_enc = self._dropout(s2_enc) # Max pooling s1_mask = s1_mask.unsqueeze(dim=-1) s2_mask = s2_mask.unsqueeze(dim=-1) s1_enc.data.masked_fill_(1 - s1_mask.byte().data, -float('inf')) s2_enc.data.masked_fill_(1 - s2_mask.byte().data, -float('inf')) s1_enc, _ = s1_enc.max(dim=1) s2_enc, _ = s2_enc.max(dim=1) return torch.cat([s1_enc, s2_enc, torch.abs(s1_enc - s2_enc), s1_enc * s2_enc], 1)
def forward(self, sent): # pylint: disable=arguments-differ """ Parameters ---------- sent : Dict[str, torch.LongTensor] From a ``TextField``. Returns ------- """ sent_embs = self._highway_layer(self._text_field_embedder(sent)) if self._cove is not None: sent_lens = torch.ne(sent['words'], self.pad_idx).long().sum(dim=-1).data sent_cove_embs = self._cove(sent['words'], sent_lens) sent_embs = torch.cat([sent_embs, sent_cove_embs], dim=-1) if self._elmo is not None: elmo_embs = self._elmo(sent['elmo']) if "words" in sent: sent_embs = torch.cat([sent_embs, elmo_embs['elmo_representations'][0]], dim=-1) else: sent_embs = elmo_embs['elmo_representations'][0] sent_embs = self._dropout(sent_embs) sent_mask = util.get_text_field_mask(sent).float() sent_lstm_mask = sent_mask if self._mask_lstms else None sent_enc = self._phrase_layer(sent_embs, sent_lstm_mask) if self._elmo is not None and len(elmo_embs['elmo_representations']) > 1: sent_enc = torch.cat([sent_enc, elmo_embs['elmo_representations'][1]], dim=-1) sent_enc = self._dropout(sent_enc) sent_mask = sent_mask.unsqueeze(dim=-1) sent_enc.data.masked_fill_(1 - sent_mask.byte().data, -float('inf')) return sent_enc.max(dim=1)[0]
def compute_mask(v, padding_idx=0): """ compute mask on given tensor v :param v: :param padding_idx: :return: """ mask = torch.ne(v, padding_idx).float() return mask
def box_loss(self,gt_label,gt_offset,pred_offset): #get the mask element which != 0 mask = torch.ne(gt_label,0) #convert mask to dim index chose_index = torch.nonzero(mask) chose_index = torch.squeeze(chose_index) #only valid element can effect the loss valid_gt_offset = gt_offset[chose_index,:] valid_pred_offset = pred_offset[chose_index,:] valid_pred_offset = torch.squeeze(valid_pred_offset) return self.loss_box(valid_pred_offset,valid_gt_offset)
def compute_accuracy(y_pred, y_true, mask_index): y_pred, y_true = normalize_sizes(y_pred, y_true) _, y_pred_indices = y_pred.max(dim=1) correct_indices = torch.eq(y_pred_indices, y_true).float() valid_indices = torch.ne(y_true, mask_index).float() n_correct = (correct_indices * valid_indices).sum().item() n_valid = valid_indices.sum().item() return n_correct / n_valid * 100
def rpn_class_loss(rpn_match, rpn_class_logits): """RPN anchor classifier loss. rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive, -1=negative, 0=neutral anchor. rpn_class_logits: [batch, anchors, 2]. RPN classifier logits for FG/BG. """ # Get anchor classes. Convert the -1/+1 match to 0/1 values. anchor_class = torch.eq(rpn_match, 1) # Positive and Negative anchors contribute to the loss, # but neutral anchors (match value = 0) don't. indices = torch.ne(rpn_match, 0.) rpn_class_logits = torch.masked_select(rpn_class_logits, indices) anchor_class = torch.masked_select(anchor_class, indices) rpn_class_logits = rpn_class_logits.contiguous().view(-1, 2) anchor_class = anchor_class.contiguous().view(-1).type(torch.cuda.LongTensor) loss = F.cross_entropy(rpn_class_logits, anchor_class, weight=None) return loss
def forward(self, prediction, annotations): # Get the inputs classifications, regressions, anchors = prediction alpha = 0.25 gamma = 2.0 batch_size = classifications.shape[0] classification_losses = [] regression_losses = [] anchor = anchors[0, :, :] anchor_widths = anchor[:, 2] - anchor[:, 0] anchor_heights = anchor[:, 3] - anchor[:, 1] anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights for j in range(batch_size): classification = classifications[j, :, :] regression = regressions[j, :, :] #bbox_annotation = annotations[j, :, :] bbox_annotation = torch.from_numpy(annotations[j].astype( np.float32)).to(anchors.device) bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1] if bbox_annotation.shape[0] == 0: regression_losses.append(torch.tensor(0).float().cuda()) classification_losses.append(torch.tensor(0).float().cuda()) continue classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4) # num_anchors x num_annotations IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4]) IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1 #import pdb # pdb.set_trace() # compute the loss for classification # targets = torch.ones_like(classification.shape) * -1 # targets = targets.cuda() targets = torch.ones_like(classification) * -1 targets[torch.lt(IoU_max, 0.4), :] = 0 positive_indices = torch.ge(IoU_max, 0.5) num_positive_anchors = positive_indices.sum() assigned_annotations = bbox_annotation[IoU_argmax, :] targets[positive_indices, :] = 0 targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1 # alpha_factor = torch.ones(targets.shape).cuda() * alpha alpha_factor = torch.ones_like(targets) * alpha alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor) focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification) focal_weight = alpha_factor * torch.pow(focal_weight, gamma) bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification)) # cls_loss = focal_weight * torch.pow(bce, gamma) cls_loss = focal_weight * bce # cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda()) cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros_like(cls_loss)) classification_losses.append( cls_loss.sum() / torch.clamp(num_positive_anchors.float(), min=1.0)) # compute the loss for regression if positive_indices.sum() > 0: assigned_annotations = assigned_annotations[ positive_indices, :] anchor_widths_pi = anchor_widths[positive_indices] anchor_heights_pi = anchor_heights[positive_indices] anchor_ctr_x_pi = anchor_ctr_x[positive_indices] anchor_ctr_y_pi = anchor_ctr_y[positive_indices] gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0] gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1] gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights # clip widths to 1 gt_widths = torch.clamp(gt_widths, min=1) gt_heights = torch.clamp(gt_heights, min=1) targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi targets_dw = torch.log(gt_widths / anchor_widths_pi) targets_dh = torch.log(gt_heights / anchor_heights_pi) targets = torch.stack( (targets_dx, targets_dy, targets_dw, targets_dh)) targets = targets.t() targets = targets / torch.Tensor([[0.1, 0.1, 0.2, 0.2]]).cuda() negative_indices = 1 + (~positive_indices) regression_diff = torch.abs(targets - regression[positive_indices, :]) regression_loss = torch.where( torch.le(regression_diff, 1.0 / 9.0), 0.5 * 9.0 * torch.pow(regression_diff, 2), regression_diff - 0.5 / 9.0) regression_losses.append(regression_loss.mean()) else: regression_losses.append(torch.tensor(0).float().cuda()) classification_loss = torch.stack(classification_losses).mean( dim=0, keepdim=True) classification_loss = classification_loss.mean() regression_loss = torch.stack(regression_losses).mean(dim=0, keepdim=True) regression_loss = regression_loss.mean() loss = classification_loss + regression_loss return loss, regression_loss, classification_loss
def forward(self, x): mask = torch.ne(x, self.pad_idx).float() return mask
def forward(self, classifications, regressions, anchors, annotations, alpha=0.25, gamma=2.0, cuda=True): # 设置 dtype = regressions.dtype batch_size = classifications.shape[0] classification_losses = [] regression_losses = [] # 获得先验框,将先验框转换成中心宽高的形势 anchor = anchors[0, :, :].to(dtype) # 转换成中心,宽高的形式 anchor_widths = anchor[:, 3] - anchor[:, 1] anchor_heights = anchor[:, 2] - anchor[:, 0] anchor_ctr_x = anchor[:, 1] + 0.5 * anchor_widths anchor_ctr_y = anchor[:, 0] + 0.5 * anchor_heights for j in range(batch_size): # 取出真实框 bbox_annotation = annotations[j] # 获得每张图片的分类结果和回归预测结果 classification = classifications[j, :, :] regression = regressions[j, :, :] # 平滑标签 classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4) if len(bbox_annotation) == 0: alpha_factor = torch.ones_like(classification) * alpha if cuda: alpha_factor = alpha_factor.cuda() alpha_factor = 1. - alpha_factor focal_weight = classification focal_weight = alpha_factor * torch.pow(focal_weight, gamma) bce = -(torch.log(1.0 - classification)) cls_loss = focal_weight * bce if cuda: regression_losses.append(torch.tensor(0).to(dtype).cuda()) else: regression_losses.append(torch.tensor(0).to(dtype)) classification_losses.append(cls_loss.sum()) continue # 获得目标预测结果 targets, num_positive_anchors, positive_indices, assigned_annotations = get_target( anchor, bbox_annotation, classification, cuda) alpha_factor = torch.ones_like(targets) * alpha if cuda: alpha_factor = alpha_factor.cuda() alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor) focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification) focal_weight = alpha_factor * torch.pow(focal_weight, gamma) bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification)) cls_loss = focal_weight * bce zeros = torch.zeros_like(cls_loss) if cuda: zeros = zeros.cuda() cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, zeros) classification_losses.append( cls_loss.sum() / torch.clamp(num_positive_anchors.to(dtype), min=1.0)) # smoooth_l1 if positive_indices.sum() > 0: targets = encode_bbox(assigned_annotations, positive_indices, anchor_widths, anchor_heights, anchor_ctr_x, anchor_ctr_y) regression_diff = torch.abs(targets - regression[positive_indices, :]) regression_loss = torch.where( torch.le(regression_diff, 1.0 / 9.0), 0.5 * 9.0 * torch.pow(regression_diff, 2), regression_diff - 0.5 / 9.0) regression_losses.append(regression_loss.mean()) else: if cuda: regression_losses.append(torch.tensor(0).to(dtype).cuda()) else: regression_losses.append(torch.tensor(0).to(dtype)) c_loss = torch.stack(classification_losses).mean() r_loss = torch.stack(regression_losses).mean() loss = c_loss + r_loss return loss, c_loss, r_loss
def __ne__(self, other): return torch.ne(self, other)
def easy(x, y): c = torch.ne(x, y) return c
def forward(self, classifications, regressions, anchors, annotations): alpha = 0.25 gamma = 2.0 batch_size = classifications.shape[0] class_losses = [] bbox_losses = [] anchor = anchors[0, :, :] # anchor(9x4), 9 anchors, 4 is top-left xy coordinates and width&height anchor_widths = anchor[:, 2] - anchor[:, 0] anchor_heights = anchor[:, 3] - anchor[:, 1] # centre coordinates of anchor anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths anchor_ctr_y = anchors[:, 1] + 0.5 * anchor_heights for j in range(batch_size): classification = classifications[j, :, :] regression = regressions[j, :, :] bbox_annotation = annotations[j, :, :] bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1] classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4) if bbox_annotation.shape[0] == 0: if torch.cuda.is_available(): alpha_factor = torch.ones(classification.shape).cuda() * alpha alpha_factor = 1. - alpha_factor focal_weight = classification focal_weight = alpha_factor * torch.pow(focal_weight, gamma) bce = -(torch.log(1.0 - classification)) # cls_loss = focal_weight * torch.pow(bce, gamma) cls_loss = focal_weight * bce class_losses.append(cls_loss.sum()) bbox_losses.append(torch.tensor(0).float()) else: alpha_factor = torch.ones(classification.shape) * alpha alpha_factor = 1. - alpha_factor focal_weight = classification focal_weight = alpha_factor * torch.pow(focal_weight, gamma) bce = -(torch.log(1.0 - classification)) # cls_loss = focal_weight * torch.pow(bce, gamma) cls_loss = focal_weight * bce class_losses.append(cls_loss.sum()) bbox_losses.append(torch.tensor(0).float()) continue IoU = IoU(anchors[0, :, :], bbox_annotation[:, :4]) # num_anchors x num_annotations IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1 # import pdb # pdb.set_trace() # compute the loss for classification targets = torch.ones(classification.shape) * -1 if torch.cuda.is_available(): targets = targets.cuda() targets[torch.lt(IoU_max, 0.4), :] = 0 positive_indices = torch.ge(IoU_max, 0.5) num_positive_anchors = positive_indices.sum() assigned_annotations = bbox_annotation[IoU_argmax, :] targets[positive_indices, :] = 0 targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1 if torch.cuda.is_available(): alpha_factor = torch.ones(targets.shape).cuda() * alpha else: alpha_factor = torch.ones(targets.shape) * alpha alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor) focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification) focal_weight = alpha_factor * torch.pow(focal_weight, gamma) bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification)) # cls_loss = focal_weight * torch.pow(bce, gamma) cls_loss = focal_weight * bce if torch.cuda.is_available(): cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda()) else: cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape)) class_losses.append(cls_loss.sum() / torch.clamp(num_positive_anchors.float(), min=1.0)) # compute the loss for regression if positive_indices.sum() > 0: assigned_annotations = assigned_annotations[positive_indices, :] anchor_widths_pi = anchor_widths[positive_indices] anchor_heights_pi = anchor_heights[positive_indices] anchor_ctr_x_pi = anchor_ctr_x[positive_indices] anchor_ctr_y_pi = anchor_ctr_y[positive_indices] gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0] gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1] gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights # clip widths to 1 gt_widths = torch.clamp(gt_widths, min=1) gt_heights = torch.clamp(gt_heights, min=1) targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi targets_dw = torch.log(gt_widths / anchor_widths_pi) targets_dh = torch.log(gt_heights / anchor_heights_pi) targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh)) targets = targets.t() if torch.cuda.is_available(): targets = targets / torch.Tensor([[0.1, 0.1, 0.2, 0.2]]).cuda() else: targets = targets / torch.Tensor([[0.1, 0.1, 0.2, 0.2]]) negative_indices = 1 + (~positive_indices) regression_diff = torch.abs(targets - regression[positive_indices, :]) regression_loss = torch.where( torch.le(regression_diff, 1.0 / 9.0), 0.5 * 9.0 * torch.pow(regression_diff, 2), regression_diff - 0.5 / 9.0 ) bbox_losses.append(regression_loss.mean()) else: if torch.cuda.is_available(): bbox_losses.append(torch.tensor(0).float().cuda()) else: bbox_losses.append(torch.tensor(0).float()) return torch.stack(class_losses).mean(dim=0, keepdim=True), torch.stack(bbox_losses).mean(dim=0, keepdim=True)
def forward(self, outs, snt_state, snt_padding_mask, copy_seq, target=None, work=False): x, alignment_weight = self.alignment_layer( outs, snt_state, snt_state, key_padding_mask=snt_padding_mask, need_weights='one') x = F.dropout(x, p=self.dropout, training=self.training) x = self.alignment_layer_norm(outs + x) residual = x x = F.relu(self.fc1(x)) x = F.dropout(x, p=self.dropout, training=self.training) x = self.fc2(x) x = F.dropout(x, p=self.dropout, training=self.training) outs = self.ff_layer_norm(residual + x) seq_len, bsz, _ = outs.size() outs_concept = torch.tanh(self.transfer(outs)) outs_concept = F.dropout(outs_concept, p=self.dropout, training=self.training) gen_gate, map_gate, copy_gate = F.softmax(self.diverter(outs_concept), -1).chunk(3, dim=-1) copy_gate = torch.cat([copy_gate, map_gate], -1) probs = gen_gate * F.softmax(self.generator(outs_concept), -1) tot_ext = 1 + copy_seq.max().item() vocab_size = probs.size(-1) if tot_ext - vocab_size > 0: ext_probs = probs.new_zeros( (1, 1, tot_ext - vocab_size)).expand(seq_len, bsz, -1) probs = torch.cat([probs, ext_probs], -1) #copy_seq: src_len x bsz x 2 #copy_gate: tgt_len x bsz x 2 #alignment_weight: tgt_len x bsz x src_len #index: tgt_len x bsz x (src_len x 2) index = copy_seq.transpose(0, 1).contiguous().view(1, bsz, -1).expand( seq_len, -1, -1) copy_probs = (copy_gate.unsqueeze(2) * alignment_weight.unsqueeze(-1)).view(seq_len, bsz, -1) probs = probs.scatter_add_(-1, index, copy_probs) ll = torch.log(probs + 1e-12) if work: return ll, outs if not self.training: _, pred = torch.max(ll, -1) total_concepts = torch.ne( target, self.vocabs['predictable_concept'].padding_idx) acc = torch.eq( pred, target).masked_select(total_concepts).float().sum().item() tot = total_concepts.sum().item() print('conc acc', acc / tot) concept_loss = -ll.gather(dim=-1, index=target.unsqueeze(-1)).squeeze(-1) concept_mask = torch.eq(target, self.vocabs['predictable_concept'].padding_idx) concept_loss = concept_loss.masked_fill_(concept_mask, 0.).sum(0) return concept_loss, outs
def _input_split_sort(input, offsets, padding_idx): """ In the circumstance of row-wise sharding of weight, we need to distribute the sorted lookup IDs of embeddingBag to each rank by range. The constraint here is that we can not directly sort the whole input because we have to differentiate between each interval because the result is aggregated. If the index in the placement is not equal to the rank number, we need to do the rearrangement based on the order given by the Sharding Spec (placement). We also calculate the split_size with padding_idx excluded per interval so that we can use it as the divisor to calculate the mean correctly. Args: input: tensor to be applied op on. offsets: start index of each interval in the 1D case. padding_idx: the embedding vector at padding_idx is excluded from the reduction. Return: input_split_sorted_list: list of ID positions sorted per interval. input_split_sorted_indices: sorted indices for per_sample_weights rearrangments. split_sizes_1d: size of each split for 1D input because it can be different in such scenario. split_sizes_1d_with_padding: size of each split for 1D input with padding_idx excluded. This is for the divisor of `mean` mode. """ input_size = input.size() input_split_sorted_list = [] split_sizes_1d = [] split_sizes_1d_with_padding = [] padding_idx = padding_idx if padding_idx is not None else -1 # For 2D tensor, we just first sort and then append row by row into a list. if len(input_size) > 1: indice_offset = 0 sorted_input, input_split_sorted_indices = torch.sort(input) for i in range(0, sorted_input.size(0)): input_split_sorted_list.append(sorted_input[i]) input_split_sorted_indices[i] += indice_offset indice_offset += input.size(1) split_sizes_1d_with_padding.append( torch.sum(torch.ne(sorted_input[i], padding_idx)).item()) input_split_sorted_indices = torch.reshape(input_split_sorted_indices, (-1, )) # Split 1D input tensor based on the given offsets. else: input_split_sorted_indices_list = [] offset_len = len(offsets) split_size = offsets[1:offset_len] - offsets[0:-1] split_sizes_1d = split_size.tolist() if torch.sum(split_size) < input.size(0): split_sizes_1d.append(input.size(0) - offsets[-1].item()) indice_offset = 0 for idx, split_result in enumerate(torch.split(input, split_sizes_1d)): split_result_sorted, indices = torch.sort(split_result) input_split_sorted_list.append(split_result_sorted) split_sizes_1d_with_padding.append( torch.sum(torch.ne(split_result_sorted, padding_idx)).item()) input_split_sorted_indices_list.append(indices + indice_offset) indice_offset += split_sizes_1d[idx] input_split_sorted_indices = torch.cat(input_split_sorted_indices_list) return ( input_split_sorted_list, input_split_sorted_indices, split_sizes_1d, split_sizes_1d_with_padding, )
def _handle_row_wise_sharding( input, world_size, weight, local_shard, offsets, per_sample_weights, mode, max_norm, norm_type, padding_idx, rank, pg, ): """ Entry-point function to handle the logic of row-wise sharding of weight for embeddingBag. (Detailed explanations of the logic can be found in the comment for sharded_embedding_bag.) Args: input: list of ID used for lookup and aggregation. world_size: number of ranks. weight: shareded weight tensor. local_shard: row-wise shared local weight used for lookup. offsets: list of start positions of each bag for 1D input. per_sample_weights: weights for weighted sum mode. mode: aggregation method of each bag. max_norm: If given, each embedding vector with norm larger than max_norm is renormalized to have norm max_norm. Note: this will modify weight in-place. norm_type: The p in the p-norm to compute for the max_norm option. padding_idx: If specified, the entries at padding_idx do not contribute to the gradient; therefore, the embedding vector at padding_idx is not updated during training, i.e. it remains as a fixed “pad”. Note that the embedding vector at padding_idx is excluded from the reduction. rank: # of cuda process. pg: process group. Returns: gathered_output: final result of lookup and aggregation. """ # We sort each interval defined by offset. If 2D, each interval is a row. input_size = input.size() ( input_split_sorted_list, input_split_sorted_indices, split_sizes_1d, split_sizes_1d_with_padding, ) = _input_split_sort(input, offsets, padding_idx) # Within each interval of the sorted list, we first need to distribute # each ID to different bucket(rank) and also ensure the rearrangement # has been done in case the placement idx not equal to rank. # We then perform some simple stats on each interval for the next step # If user specifies per_sample_weights we need to rearrange them # to be sync with IDs and then distribute them to each rank ( input_combined, input_combined_split_sizes, offsets_rearrange_list, offsets_rearrange_sizes, per_sample_weights, sharded_dim_size_max, padding_idx, ) = _sorted_input_distribute_prepare( input_split_sorted_list, input_split_sorted_indices, world_size, input, weight, per_sample_weights, rank, padding_idx, ) # Send ID/offsets/per_sample_weights to different bucket(rank). ( gathered_input, output_offsets_tensor_list, output_split_sizes, gathered_per_sample_weights, ) = _distribute_input( input_combined, input_combined_split_sizes, offsets_rearrange_list, offsets_rearrange_sizes, sharded_dim_size_max, world_size, input, per_sample_weights, pg, ) # Perform the embedding bag look-up and aggregation results = [] for i, inp in enumerate(gathered_input): per_sample_weights = (gathered_per_sample_weights[i] if gathered_per_sample_weights is not None else None) # If input is None, passing in max_norm causes # errors in CUDA. if max_norm is not None and inp.size(0) == 0: max_norm = None # Perform local embedding look up and aggregation. result = torch.nn.functional.embedding_bag( inp, local_shard, offsets=output_offsets_tensor_list[i], mode=mode if mode != "mean" else "sum", per_sample_weights=per_sample_weights, max_norm=max_norm, norm_type=norm_type, padding_idx=padding_idx, ) if mode != "max": results.append(result) # For max case, it there is no look-up from some ranks # it will return all zero for that. For that case, we need # to set the row to neg inf; otherwise, in the final # aggregation negative values will be rounded up to zero. elif inp.size(0) == 0: result[:] = -float("Inf") results.append(result) else: for idx, current_offset in enumerate( output_offsets_tensor_list[i]): next_offset = current_offset if idx == len(output_offsets_tensor_list[i]) - 1: next_offset = output_split_sizes[i] else: next_offset = output_offsets_tensor_list[i][idx + 1] # When there is no interval in the current rank or all IDs # are equal to padding_idx, we then need to ensure they # don't contribute to the final result. if (current_offset == next_offset) or ( padding_idx is not None and not torch.any( torch.ne(inp[current_offset:next_offset], padding_idx))): result[idx] = -float("Inf") results.append(result) # Gather all the aggregated results appropriately by using reduce_scatter. row_size = input.size(0) if len(input_size) > 1 else len(split_sizes_1d) gathered_output = torch.empty(row_size, weight.size(1), device=input.device) op = ReduceOp.SUM if mode != "max" else ReduceOp.MAX dist.reduce_scatter(gathered_output, results, op=op, group=pg) # For Mean, we cannot do the division until very end because the sum of means # not equal to the mean of sum. (Divisor is different) if mode == "mean": split_sizes_1d_tensor = torch.tensor(split_sizes_1d_with_padding, dtype=torch.float, device=input.device) # Make sure divisor is not zero. split_sizes_1d_tensor[split_sizes_1d_tensor == 0.0] = 1.0 return (torch.div(gathered_output.t().contiguous(), split_sizes_1d_tensor).t().contiguous()) # Return the appropriate local result. return gathered_output
def check_adv(cls, logits, label, targeted, confidence): if targeted: return torch.eq(torch.argmax(logits - confidence, 1), label.squeeze()) return torch.ne(torch.argmax(logits - confidence, 1), label.squeeze())
def forward(self, classifications, bbox_regressions, ldm_regressions, anchors, annotations): device = classifications.device batch_size = classifications.shape[0] classification_losses = [] bbox_regression_losses = [] ldm_regression_losses = [] anchor = anchors[0, :, :] anchor_widths = anchor[:, 2] - anchor[:, 0] anchor_heights = anchor[:, 3] - anchor[:, 1] anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights for j in range(batch_size): classification = classifications[j, :, :] bbox_regression = bbox_regressions[j, :, :] ldm_regression = ldm_regressions[j, :, :] annotation = annotations[j, :, :] annotation = annotation[annotation[:, 0] > 0] bbox_annotation = annotation[:, :4] ldm_annotation = annotation[:, 4:] if bbox_annotation.shape[0] == 0: bbox_regression_losses.append( torch.tensor(0., requires_grad=True, device=device)) classification_losses.append( torch.tensor(0., requires_grad=True, device=device)) ldm_regression_losses.append( torch.tensor(0., requires_grad=True, device=device)) continue # IoU betweens anchors and annotations IoU = box_iou(anchor, bbox_annotation) IoU_max, IoU_argmax = torch.max(IoU, dim=1) targets = torch.ones_like(classification) * -1 # those whose iou<0.3 have no object negative_indices = torch.lt(IoU_max, 0.3) targets[negative_indices, :] = 0 targets[negative_indices, 1] = 1 # those whose iou>0.5 have object positive_indices = torch.ge(IoU_max, 0.5) targets[positive_indices, :] = 0 targets[positive_indices, 0] = 1 # keep positive and negative ratios with 1:3 num_positive_anchors = positive_indices.sum() keep_negative_anchors = num_positive_anchors * 3 bbox_assigned_annotations = bbox_annotation[IoU_argmax, :] ldm_assigned_annotations = ldm_annotation[IoU_argmax, :] ldm_sum = ldm_assigned_annotations.sum(dim=1) ge0_mask = ldm_sum > 0 ldm_positive_indices = ge0_mask & positive_indices # OHEM # negative_losses = classification[negative_indices, 1] * -1 negative_losses = self.centropy( classification[negative_indices], targets[negative_indices].argmax(dim=1)) sorted_losses, _ = torch.sort(negative_losses, descending=True) if sorted_losses.numel() > keep_negative_anchors: sorted_losses = sorted_losses[:keep_negative_anchors] # positive_losses = classification[positive_indices, 0] * -1 positive_losses = self.centropy( classification[positive_indices], targets[positive_indices].argmax(dim=1)) # focal loss focal_loss = False if focal_loss: alpha = 0.25 gamma = 2.0 alpha_factor = torch.ones_like(targets) * alpha alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor) focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification) focal_weight = alpha_factor * torch.pow(focal_weight, gamma) bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification)) cls_loss = focal_weight * bce cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros_like(cls_loss)) classification_losses.append( cls_loss.sum() / torch.clamp(num_positive_anchors.float(), min=1.0)) else: if positive_indices.sum() > 0: classification_losses.append(positive_losses.mean() + sorted_losses.mean()) else: classification_losses.append( torch.tensor(0., requires_grad=True, device=device)) # compute bboxes loss if positive_indices.sum() > 0: # bbox anchor_widths_pi = anchor_widths[positive_indices] anchor_heights_pi = anchor_heights[positive_indices] anchor_ctr_x_pi = anchor_ctr_x[positive_indices] anchor_ctr_y_pi = anchor_ctr_y[positive_indices] bbox_assigned_annotations = bbox_assigned_annotations[ positive_indices, :] gt_widths = bbox_assigned_annotations[:, 2] - bbox_assigned_annotations[:, 0] gt_heights = bbox_assigned_annotations[:, 3] - bbox_assigned_annotations[:, 1] gt_ctr_x = bbox_assigned_annotations[:, 0] + 0.5 * gt_widths gt_ctr_y = bbox_assigned_annotations[:, 1] + 0.5 * gt_heights targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / (anchor_widths_pi + 1e-14) targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / (anchor_heights_pi + 1e-14) targets_dw = torch.log(gt_widths / anchor_widths_pi) targets_dh = torch.log(gt_heights / anchor_heights_pi) bbox_targets = torch.stack( (targets_dx, targets_dy, targets_dw, targets_dh)) bbox_targets = bbox_targets.t() # Rescale bbox_targets = bbox_targets / torch.tensor( [[0.1, 0.1, 0.2, 0.2]], device=device) # smooth L1 box losses bbox_regression_loss = self.smoothl1( bbox_targets, bbox_regression[positive_indices, :]) bbox_regression_losses.append(bbox_regression_loss) else: bbox_regression_losses.append( torch.tensor(0., requires_grad=True, device=device)) # compute landmarks loss if ldm_positive_indices.sum() > 0: ldm_assigned_annotations = ldm_assigned_annotations[ ldm_positive_indices, :] anchor_widths_l = anchor_widths[ldm_positive_indices] anchor_heights_l = anchor_heights[ldm_positive_indices] anchor_ctr_x_l = anchor_ctr_x[ldm_positive_indices] anchor_ctr_y_l = anchor_ctr_y[ldm_positive_indices] l0_x = (ldm_assigned_annotations[:, 0] - anchor_ctr_x_l) / (anchor_widths_l + 1e-14) l0_y = (ldm_assigned_annotations[:, 1] - anchor_ctr_y_l) / (anchor_heights_l + 1e-14) l1_x = (ldm_assigned_annotations[:, 2] - anchor_ctr_x_l) / (anchor_widths_l + 1e-14) l1_y = (ldm_assigned_annotations[:, 3] - anchor_ctr_y_l) / (anchor_heights_l + 1e-14) l2_x = (ldm_assigned_annotations[:, 4] - anchor_ctr_x_l) / (anchor_widths_l + 1e-14) l2_y = (ldm_assigned_annotations[:, 5] - anchor_ctr_y_l) / (anchor_heights_l + 1e-14) l3_x = (ldm_assigned_annotations[:, 6] - anchor_ctr_x_l) / (anchor_widths_l + 1e-14) l3_y = (ldm_assigned_annotations[:, 7] - anchor_ctr_y_l) / (anchor_heights_l + 1e-14) l4_x = (ldm_assigned_annotations[:, 8] - anchor_ctr_x_l) / (anchor_widths_l + 1e-14) l4_y = (ldm_assigned_annotations[:, 9] - anchor_ctr_y_l) / (anchor_heights_l + 1e-14) ldm_targets = torch.stack((l0_x, l0_y, l1_x, l1_y, l2_x, l2_y, l3_x, l3_y, l4_x, l4_y)) ldm_targets = ldm_targets.t() # Rescale scale = torch.ones(1, 10, device=device) * 0.1 ldm_targets = ldm_targets / scale ldm_regression_loss = self.smoothl1( ldm_targets, ldm_regression[ldm_positive_indices, :]) ldm_regression_losses.append(ldm_regression_loss) else: ldm_regression_losses.append( torch.tensor(0., requires_grad=True).cuda()) batch_cls_losses = torch.stack(classification_losses).mean() batch_box_losses = torch.stack(bbox_regression_losses).mean() batch_lmk_losses = torch.stack(ldm_regression_losses).mean() losses = batch_cls_losses + self.lmd1 * batch_box_losses + self.lmd2 * batch_lmk_losses return losses
def forward(self, classifications, regressions, anchors, annotations): def extract(box): w, h = box[:, 2] - box[:, 0], box[:, 3] - box[:, 1] return w, h, box[:, 0] + .5 * w, box[:, 1] + .5 * h batch_size = classifications.shape[0] classification_losses = [] regression_losses = [] anchor_widths, anchor_heights, anchor_ctr_x, anchor_ctr_y = extract( anchors[0, :, :]) for classification, regression, bbox_annotation in zip( classifications, regressions, annotations): bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1] if bbox_annotation.shape[0] == 0: regression_losses.append( torch.tensor(0).float().to(self.device)) classification_losses.append( torch.tensor(0).float().to(self.device)) continue classification = torch.clamp(classification, 1e-4, 1. - 1e-4) IoU = compute_iou_( anchors[0, :, :], bbox_annotation[:, :4]) # num_anchors x num_annotations IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1 # compute the loss for classification targets = (torch.ones(classification.shape) * -1).to(self.device) targets[torch.lt(IoU_max, .4), :] = 0 positive_indices = torch.ge(IoU_max, .5) num_positive_anchors = positive_indices.sum() assigned_annotations = bbox_annotation[IoU_argmax, :] targets[positive_indices, :] = 0 targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1 alpha_factor = torch.ones(targets.shape).to( self.device) * self.alpha alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor) focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification) focal_weight = alpha_factor * torch.pow(focal_weight, self.gamma) bce = -(targets * torch.log(classification) + (1. - targets) * torch.log(1.0 - classification)) cls_loss = focal_weight * bce cls_loss = torch.where(torch.ne(targets, -1.), cls_loss, torch.zeros(cls_loss.shape).to(self.device)) classification_losses.append( cls_loss.sum() / torch.clamp(num_positive_anchors.float(), min=1.)) # compute the loss for regression if positive_indices.sum().item() > 0: assigned_annotations = assigned_annotations[ positive_indices, :] anchor_widths_pi = anchor_widths[positive_indices] anchor_heights_pi = anchor_heights[positive_indices] anchor_ctr_x_pi = anchor_ctr_x[positive_indices] anchor_ctr_y_pi = anchor_ctr_y[positive_indices] gt_widths, gt_heights, gt_ctr_x, gt_ctr_y = extract( assigned_annotations) # clip widths to 1 gt_widths = torch.clamp(gt_widths, min=1) gt_heights = torch.clamp(gt_heights, min=1) targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi targets_dw = torch.log(gt_widths / anchor_widths_pi) targets_dh = torch.log(gt_heights / anchor_heights_pi) targets = torch.stack( (targets_dx, targets_dy, targets_dw, targets_dh)) targets = targets.t() targets = targets / torch.Tensor([[.1, .1, .2, .2]]).to( self.device) negative_indices = 1 - positive_indices regression_diff = torch.abs(targets - regression[positive_indices, :]) regression_loss = torch.where( torch.le(regression_diff, 1. / 9.), .5 * 9. * torch.pow(regression_diff, 2), regression_diff - .5 / 9.) regression_losses.append(regression_loss.mean()) else: regression_losses.append( torch.tensor(0).float().to(self.device)) return torch.stack(classification_losses).mean(dim=0, keepdim=True), \ torch.stack(regression_losses).mean(dim=0, keepdim=True)
def search(self, src, is_greedy=True, max_length=255): mask, x_length = None, None if isinstance(src, tuple): x, x_length = src mask = self.generate_mask(x, x_length) else: x = src batch_size = x.size(0) emb_src = self.emb_src(x) h_src, h_0_tgt = self.encoder((emb_src, x_length)) h_0_tgt, c_0_tgt = h_0_tgt h_0_tgt = h_0_tgt.transpose(0, 1).contiguous().view( batch_size, -1, self.hidden_size).transpose(0, 1).contiguous() c_0_tgt = c_0_tgt.transpose(0, 1).contiguous().view( batch_size, -1, self.hidden_size).transpose(0, 1).contiguous() h_0_tgt = (h_0_tgt, c_0_tgt) # Fill a vector, which has 'batch_size' dimension, with BOS value. y = x.new(batch_size, 1).zero_() + data_loader.BOS is_undone = x.new_ones(batch_size, 1).float() decoder_hidden = h_0_tgt h_t_tilde, y_hats, indice = None, [], [] # Repeat a loop while sum of 'is_undone' flag is bigger than 0, # or current time-step is smaller than maximum length. while is_undone.sum() > 0 and len(indice) < max_length: # Unlike training procedure, # take the last time-step's output during the inference. emb_t = self.emb_dec(y) # |emb_t| = (batch_size, 1, word_vec_dim) decoder_output, decoder_hidden = self.decoder( emb_t, h_t_tilde, decoder_hidden) context_vector = self.attn(h_src, decoder_output, mask) h_t_tilde = self.tanh( self.concat(torch.cat([decoder_output, context_vector], dim=-1))) y_hat = self.generator(h_t_tilde) # |y_hat| = (batch_size, 1, output_size) y_hats += [y_hat] if is_greedy: y = torch.topk(y_hat, 1, dim=-1)[1].squeeze(-1) else: # Take a random sampling based on the multinoulli distribution. y = torch.multinomial(y_hat.exp().view(batch_size, -1), 1) # Put PAD if the sample is done. y = y.masked_fill_((1. - is_undone).bool(), data_loader.PAD) is_undone = is_undone * torch.ne(y, data_loader.EOS).float() # |y| = (batch_size, 1) # |is_undone| = (batch_size, 1) indice += [y] y_hats = torch.cat(y_hats, dim=1) indice = torch.cat(indice, dim=-1) # |y_hat| = (batch_size, length, output_size) # |indice| = (batch_size, length) return y_hats, indice
def compute_mask(self, x): mask = torch.ne(x, 0).float() if self.enable_cuda: mask = mask.cuda() return mask
input_seq[batch_size - 1, max_src_seq_len - 1] = 0 input_seq[batch_size - 1, max_src_seq_len - 2] = 0 input_seq[batch_size - 2, max_src_seq_len - 1] = 0 input_seq[1][5] = 1 input_seq[3][2] = 1 input_seq[3][5] = 1 input_seq[3][6] = 1 input_seq[0][2] = 1 input_seq_oov = np.copy(input_seq) input_seq_oov[1][5] = 20 input_seq_oov[3][2] = 20 input_seq_oov[3][5] = 21 input_seq_oov[3][6] = 22 input_seq_oov[0][2] = 20 input_seq = torch.LongTensor(input_seq) input_seq_oov = torch.LongTensor(input_seq_oov) src_mask = torch.ne(input_seq, 0) src_mask = src_mask.type(torch.FloatTensor) max_num_oovs = 3 t = 5 trg_side_memory_bank = torch.randn((batch_size, t - 1, decoder_size)) final_dist, h_next, context, attn_dist, p_gen, coverage = decoder( y, h, memory_bank, src_mask, max_num_oovs, input_seq_oov, coverage, trg_side_memory_bank) print("Pass")
def generate(self, batch_data, eval_data): generate_corpus = [] idx2token = eval_data.target_idx2token source_text = batch_data['source_idx'] source_length = batch_data['source_length'] source_embeddings = self.source_token_embedder(source_text) encoder_outputs, encoder_states = self.encoder(source_embeddings, source_length) if self.bidirectional: encoder_outputs = encoder_outputs[:, :, self.hidden_size:] + encoder_outputs[:, :, :self.hidden_size] if (self.rnn_type == 'lstm'): encoder_states = (encoder_states[0][::2], encoder_states[1][::2]) else: encoder_states = encoder_states[::2] encoder_masks = torch.ne(source_text, self.padding_token_idx) for bid in range(source_text.size(0)): decoder_states = encoder_states[:, bid, :].unsqueeze(1) encoder_output = encoder_outputs[bid, :, :].unsqueeze(0) encoder_mask = encoder_masks[bid, :].unsqueeze(0) generate_tokens = [] input_seq = torch.LongTensor([[self.sos_token_idx]]).to(self.device) if (self.strategy == 'beam_search'): hypothesis = Beam_Search_Hypothesis( self.beam_size, self.sos_token_idx, self.eos_token_idx, self.device, idx2token ) for gen_idx in range(self.target_max_length): decoder_input = self.target_token_embedder(input_seq) if self.attention_type is not None: decoder_outputs, decoder_states, _ = self.decoder( decoder_input, decoder_states, encoder_output, encoder_mask ) else: decoder_outputs, decoder_states = self.decoder(decoder_input, decoder_states) token_logits = self.vocab_linear(decoder_outputs) if (self.strategy == 'topk_sampling'): token_idx = topk_sampling(token_logits).item() elif (self.strategy == 'greedy_search'): token_idx = greedy_search(token_logits).item() elif (self.strategy == 'beam_search'): if self.attention_type is not None: input_seq, decoder_states, encoder_output, encoder_mask = \ hypothesis.step(gen_idx, token_logits, decoder_states, encoder_output, encoder_mask) else: input_seq, decoder_states = hypothesis.step(gen_idx, token_logits, decoder_states) if (self.strategy in ['topk_sampling', 'greedy_search']): if token_idx == self.eos_token_idx: break else: generate_tokens.append(idx2token[token_idx]) input_seq = torch.LongTensor([[token_idx]]).to(self.device) elif (self.strategy == 'beam_search'): if (hypothesis.stop()): break if (self.strategy == 'beam_search'): generate_tokens = hypothesis.generate() generate_corpus.append(generate_tokens) return generate_corpus
def test_ne(x, y): c = torch.ne(torch.add(x, y), y) return c
def test_model_trains(self): # Performs one step of training and verifies that the weights are updated, implying some training occurs. with TemporaryDirectory() as tmpdirname: T = torch.cuda.FloatTensor latent = np.random.rand(64, 1, 512) os.makedirs(os.path.dirname(tmpdirname + '/encoded_smiles.latent'), exist_ok=True) with open(tmpdirname + '/encoded_smiles.latent', 'w') as f: json.dump(latent.tolist(), f) C = CreateModelRunner(input_data_path=tmpdirname + '/encoded_smiles.latent', output_model_folder=tmpdirname) C.run() D = Discriminator.load(tmpdirname + '/discriminator.txt') G = Generator.load(tmpdirname + '/generator.txt') G.cuda() D.cuda() optimizer_G = torch.optim.Adam(G.parameters()) optimizer_D = torch.optim.Adam(D.parameters()) json_smiles = open(tmpdirname + '/encoded_smiles.latent', "r") latent_space_mols = np.array(json.load(json_smiles)) testSampler = Sampler(G) latent_space_mols = latent_space_mols.reshape( latent_space_mols.shape[0], 512) dataloader = torch.utils.data.DataLoader( LatentMolsDataset(latent_space_mols), shuffle=True, batch_size=64, drop_last=True) for _, real_mols in enumerate(dataloader): real_mols = real_mols.type(T) before_G_params = [] before_D_params = [] for param in G.parameters(): before_G_params.append(param.view(-1)) before_G_params = torch.cat(before_G_params) for param in D.parameters(): before_D_params.append(param.view(-1)) before_D_params = torch.cat(before_D_params) optimizer_D.zero_grad() fake_mols = testSampler.sample(real_mols.shape[0]) real_validity = D(real_mols) fake_validity = D(fake_mols) #It is not relevant to compute gradient penalty. The test is only interested in if there is a change in #the weights (training), not in giving proper training d_loss = -torch.mean(real_validity) + torch.mean(fake_validity) d_loss.backward() optimizer_D.step() optimizer_G.zero_grad() fake_mols = testSampler.sample(real_mols.shape[0]) fake_validity = D(fake_mols) g_loss = -torch.mean(fake_validity) g_loss.backward() optimizer_G.step() after_G_params = [] after_D_params = [] for param in G.parameters(): after_G_params.append(param.view(-1)) after_G_params = torch.cat(after_G_params) for param in D.parameters(): after_D_params.append(param.view(-1)) after_D_params = torch.cat(after_D_params) self.assertTrue( torch.any(torch.ne(after_G_params, before_G_params))) self.assertTrue( torch.any(torch.ne(after_D_params, before_D_params))) break
def forward(self, x): return torch.ne(x, 3)
def forward(self, classifications, bbox_regressions, ldm_regressions, anchors, annotations): batch_size = classifications.shape[0] classification_losses = [] bbox_regression_losses = [] ldm_regression_losses = [] anchor = anchors[0, :, :] anchor_widths = anchor[:, 2] - anchor[:, 0] anchor_heights = anchor[:, 3] - anchor[:, 1] anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights #temp positive_indices_list = [] for j in range(batch_size): classification = classifications[j, :, :] bbox_regression = bbox_regressions[j, :, :] ldm_regression = ldm_regressions[j, :, :] annotation = annotations[j, :, :] # annotation = annotation[annotation[:,0] != -1] annotation = annotation[annotation[:, 0] > 0] bbox_annotation = annotation[:, :4] ldm_annotation = annotation[:, 4:] if bbox_annotation.shape[0] == 0: bbox_regression_losses.append(torch.tensor(0).float().cuda()) classification_losses.append(torch.tensor(0).float().cuda()) ldm_regression_losses.append(torch.tensor(0).float().cuda()) # temp positive_indices_list.append([]) continue IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4]) #IoU, filt_iou = filt_IoU(anchors[0, :, :], bbox_annotation, ldm_annotation) IoU_max, IoU_argmax = torch.max(IoU, dim=1) targets = torch.ones(classification.shape) * -1 targets = targets.cuda() # those whose iou<0.3 have no object negative_indices = torch.lt(IoU_max, 0.3) targets[negative_indices, :] = 0 targets[negative_indices, 1] = 1 # those whose iou>0.5 have object positive_indices = torch.ge(IoU_max, 0.5) #temp positive_indices_list.append(positive_indices) num_positive_anchors = positive_indices.sum() #keep positive and negative ratios with 1:3 keep_negative_anchors = num_positive_anchors * 3 bbox_assigned_annotations = bbox_annotation[IoU_argmax, :] ldm_assigned_annotations = ldm_annotation[IoU_argmax, :] targets[positive_indices, :] = 0 targets[positive_indices, 0] = 1 # ignore targets with no landmarks # f_IoU_max ,f_IoU_argmax = torch.max(filt_iou, dim=1) # ldm_positive_indices = torch.ge(f_IoU_max, 0.5) ldm_sum = ldm_assigned_annotations.sum(dim=1) ge0_mask = ldm_sum > 0 ldm_positive_indices = ge0_mask & positive_indices # OHEM negative_losses = classification[negative_indices, 1] * -1 sorted_losses, _ = torch.sort(negative_losses, descending=True) if sorted_losses.numel() > keep_negative_anchors: sorted_losses = sorted_losses[:keep_negative_anchors] positive_losses = classification[positive_indices, 0] * -1 focal_loss = False # focal loss if focal_loss: alpha = 0.25 gamma = 2.0 alpha_factor = torch.ones(targets.shape).cuda() * alpha alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor) focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification) focal_weight = alpha_factor * torch.pow(focal_weight, gamma) bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification)) cls_loss = focal_weight * bce cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda()) classification_losses.append( cls_loss.sum() / torch.clamp(num_positive_anchors.float(), min=1.0)) else: if positive_indices.sum() > 0: classification_losses.append(positive_losses.mean() + sorted_losses.mean()) else: classification_losses.append( torch.tensor(0).float().cuda()) # compute bboxes loss if positive_indices.sum() > 0: # bbox bbox_assigned_annotations = bbox_assigned_annotations[ positive_indices, :] anchor_widths_pi = anchor_widths[positive_indices] anchor_heights_pi = anchor_heights[positive_indices] anchor_ctr_x_pi = anchor_ctr_x[positive_indices] anchor_ctr_y_pi = anchor_ctr_y[positive_indices] gt_widths = bbox_assigned_annotations[:, 2] - bbox_assigned_annotations[:, 0] gt_heights = bbox_assigned_annotations[:, 3] - bbox_assigned_annotations[:, 1] gt_ctr_x = bbox_assigned_annotations[:, 0] + 0.5 * gt_widths gt_ctr_y = bbox_assigned_annotations[:, 1] + 0.5 * gt_heights targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / (anchor_widths_pi + 1e-14) targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / (anchor_heights_pi + 1e-14) targets_dw = torch.log(gt_widths / anchor_widths_pi) targets_dh = torch.log(gt_heights / anchor_heights_pi) bbox_targets = torch.stack( (targets_dx, targets_dy, targets_dw, targets_dh)) bbox_targets = bbox_targets.t() # Rescale bbox_targets = bbox_targets / torch.Tensor( [[0.1, 0.1, 0.2, 0.2]]).cuda() # smooth L1 # box losses bbox_regression_loss = self.smoothl1( bbox_targets, bbox_regression[positive_indices, :]) bbox_regression_losses.append(bbox_regression_loss) else: bbox_regression_losses.append(torch.tensor(0).float().cuda()) # compute landmarks loss if ldm_positive_indices.sum() > 0: ldm_assigned_annotations = ldm_assigned_annotations[ ldm_positive_indices, :] anchor_widths_l = anchor_widths[ldm_positive_indices] anchor_heights_l = anchor_heights[ldm_positive_indices] anchor_ctr_x_l = anchor_ctr_x[ldm_positive_indices] anchor_ctr_y_l = anchor_ctr_y[ldm_positive_indices] l0_x = (ldm_assigned_annotations[:, 0] - anchor_ctr_x_l) / (anchor_widths_l + 1e-14) l0_y = (ldm_assigned_annotations[:, 1] - anchor_ctr_y_l) / (anchor_heights_l + 1e-14) l1_x = (ldm_assigned_annotations[:, 2] - anchor_ctr_x_l) / (anchor_widths_l + 1e-14) l1_y = (ldm_assigned_annotations[:, 3] - anchor_ctr_y_l) / (anchor_heights_l + 1e-14) l2_x = (ldm_assigned_annotations[:, 4] - anchor_ctr_x_l) / (anchor_widths_l + 1e-14) l2_y = (ldm_assigned_annotations[:, 5] - anchor_ctr_y_l) / (anchor_heights_l + 1e-14) l3_x = (ldm_assigned_annotations[:, 6] - anchor_ctr_x_l) / (anchor_widths_l + 1e-14) l3_y = (ldm_assigned_annotations[:, 7] - anchor_ctr_y_l) / (anchor_heights_l + 1e-14) l4_x = (ldm_assigned_annotations[:, 8] - anchor_ctr_x_l) / (anchor_widths_l + 1e-14) l4_y = (ldm_assigned_annotations[:, 9] - anchor_ctr_y_l) / (anchor_heights_l + 1e-14) ldm_targets = torch.stack((l0_x, l0_y, l1_x, l1_y, l2_x, l2_y, l3_x, l3_y, l4_x, l4_y)) ldm_targets = ldm_targets.t() # Rescale scale = torch.ones(1, 10) * 0.1 ldm_targets = ldm_targets / scale.cuda() ldm_regression_loss = self.smoothl1( ldm_targets, ldm_regression[ldm_positive_indices, :]) ldm_regression_losses.append(ldm_regression_loss) else: ldm_regression_losses.append(torch.tensor(0).float().cuda()) return torch.stack(classification_losses), torch.stack( bbox_regression_losses), torch.stack(ldm_regression_losses)
def forward( self, input_ids=None, past_key_values=None, attention_mask=None, position_ids=None, head_mask=None, inputs_embeds=None, labels=None, use_cache=None, output_attentions=None, output_hidden_states=None, return_dict=None, ) -> Union[Tuple[torch.Tensor], SequenceClassifierOutputWithPast]: r""" labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If `config.num_labels > 1` a classification loss is computed (Cross-Entropy). """ return_dict = return_dict if return_dict is not None else self.config.use_return_dict transformer_outputs = self.transformer( input_ids, past_key_values=past_key_values, attention_mask=attention_mask, position_ids=position_ids, head_mask=head_mask, inputs_embeds=inputs_embeds, use_cache=use_cache, output_attentions=output_attentions, output_hidden_states=output_hidden_states, return_dict=return_dict, ) hidden_states = transformer_outputs[0] logits = self.score(hidden_states) if input_ids is not None: batch_size = input_ids.shape[0] else: batch_size = inputs_embeds.shape[0] if self.config.pad_token_id is None and batch_size != 1: raise ValueError( "Cannot handle batch sizes > 1 if no padding token is defined." ) if self.config.pad_token_id is None: sequence_lengths = -1 else: if input_ids is not None: sequence_lengths = torch.ne( input_ids, self.config.pad_token_id).sum(-1) - 1 else: sequence_lengths = -1 logger.warning( f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be " "unexpected if using padding tokens in conjunction with `inputs_embeds.`" ) pooled_logits = logits[torch.arange(batch_size, device=logits.device), sequence_lengths] loss = None if labels is not None: if self.config.problem_type is None: if self.num_labels == 1: self.config.problem_type = "regression" elif self.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int): self.config.problem_type = "single_label_classification" else: self.config.problem_type = "multi_label_classification" if self.config.problem_type == "regression": loss_fct = MSELoss() if self.num_labels == 1: loss = loss_fct(pooled_logits.squeeze(), labels.squeeze()) else: loss = loss_fct(pooled_logits, labels) elif self.config.problem_type == "single_label_classification": loss_fct = CrossEntropyLoss() loss = loss_fct(pooled_logits.view(-1, self.num_labels), labels.view(-1)) elif self.config.problem_type == "multi_label_classification": loss_fct = BCEWithLogitsLoss() loss = loss_fct(pooled_logits, labels) if not return_dict: output = (pooled_logits, ) + transformer_outputs[1:] return ((loss, ) + output) if loss is not None else output return SequenceClassifierOutputWithPast( loss=loss, logits=pooled_logits, past_key_values=transformer_outputs.past_key_values, hidden_states=transformer_outputs.hidden_states, attentions=transformer_outputs.attentions, )
def nNanElement(self, x): return torch.sum(torch.ne(x, x).float())
def forward(self, s1, s2): # pylint: disable=arguments-differ """ Parameters ---------- s1 : Dict[str, torch.LongTensor] From a ``TextField``. s2 : Dict[str, torch.LongTensor] From a ``TextField``. The model assumes that this s2 contains the answer to the s1, and predicts the beginning and ending positions of the answer within the s2. Returns ------- pair_rep : torch.FloatTensor? Tensor representing the final output of the BiDAF model to be plugged into the next module """ s1_embs = self._highway_layer(self._text_field_embedder(s1)) s2_embs = self._highway_layer(self._text_field_embedder(s2)) if self._elmo is not None: s1_elmo_embs = self._elmo(s1['elmo']) s2_elmo_embs = self._elmo(s2['elmo']) if "words" in s1: s1_embs = torch.cat( [s1_embs, s1_elmo_embs['elmo_representations'][0]], dim=-1) s2_embs = torch.cat( [s2_embs, s2_elmo_embs['elmo_representations'][0]], dim=-1) else: s1_embs = s1_elmo_embs['elmo_representations'][0] s2_embs = s2_elmo_embs['elmo_representations'][0] if self._cove is not None: s1_lens = torch.ne(s1['words'], self.pad_idx).long().sum(dim=-1).data s2_lens = torch.ne(s2['words'], self.pad_idx).long().sum(dim=-1).data s1_cove_embs = self._cove(s1['words'], s1_lens) s1_embs = torch.cat([s1_embs, s1_cove_embs], dim=-1) s2_cove_embs = self._cove(s2['words'], s2_lens) s2_embs = torch.cat([s2_embs, s2_cove_embs], dim=-1) s1_embs = self._dropout(s1_embs) s2_embs = self._dropout(s2_embs) if self._mask_lstms: s1_mask = s1_lstm_mask = util.get_text_field_mask(s1).float() s2_mask = s2_lstm_mask = util.get_text_field_mask(s2).float() s1_mask_2 = util.get_text_field_mask(s1).float() s2_mask_2 = util.get_text_field_mask(s2).float() else: s1_lstm_mask, s2_lstm_mask, s2_lstm_mask_2 = None, None, None s1_enc = self._phrase_layer(s1_embs, s1_lstm_mask) s2_enc = self._phrase_layer(s2_embs, s2_lstm_mask) # Similarity matrix # Shape: (batch_size, s2_length, s1_length) similarity_mat = self._matrix_attention(s2_enc, s1_enc) # s2 representation # Shape: (batch_size, s2_length, s1_length) s2_s1_attention = util.last_dim_softmax(similarity_mat, s1_mask) # Shape: (batch_size, s2_length, encoding_dim) s2_s1_vectors = util.weighted_sum(s1_enc, s2_s1_attention) # batch_size, seq_len, 4*enc_dim s2_w_context = torch.cat([s2_enc, s2_s1_vectors], 2) # s1 representation, using same attn method as for the s2 representation s1_s2_attention = util.last_dim_softmax( similarity_mat.transpose(1, 2).contiguous(), s2_mask) # Shape: (batch_size, s1_length, encoding_dim) s1_s2_vectors = util.weighted_sum(s2_enc, s1_s2_attention) s1_w_context = torch.cat([s1_enc, s1_s2_vectors], 2) if self._elmo is not None and self._deep_elmo: s1_w_context = torch.cat( [s1_w_context, s1_elmo_embs['elmo_representations'][1]], dim=-1) s2_w_context = torch.cat( [s2_w_context, s2_elmo_embs['elmo_representations'][1]], dim=-1) s1_w_context = self._dropout(s1_w_context) s2_w_context = self._dropout(s2_w_context) modeled_s2 = self._dropout( self._modeling_layer(s2_w_context, s2_lstm_mask)) s2_mask_2 = s2_mask_2.unsqueeze(dim=-1) modeled_s2.data.masked_fill_(1 - s2_mask_2.byte().data, -float('inf')) s2_enc_attn = modeled_s2.max(dim=1)[0] modeled_s1 = self._dropout( self._modeling_layer(s1_w_context, s1_lstm_mask)) s1_mask_2 = s1_mask_2.unsqueeze(dim=-1) modeled_s1.data.masked_fill_(1 - s1_mask_2.byte().data, -float('inf')) s1_enc_attn = modeled_s1.max(dim=1)[0] return torch.cat([ s1_enc_attn, s2_enc_attn, torch.abs(s1_enc_attn - s2_enc_attn), s1_enc_attn * s2_enc_attn ], 1)
def getNanMask(self, x): return torch.ne(x, x)
def forward(self, s1, s2): # pylint: disable=arguments-differ """ Parameters ---------- s1 : Dict[str, torch.LongTensor] From a ``TextField``. s2 : Dict[str, torch.LongTensor] From a ``TextField``. The model assumes that this s2 contains the answer to the s1, and predicts the beginning and ending positions of the answer within the s2. Returns ------- pair_rep : torch.FloatTensor? Tensor representing the final output of the BiDAF model to be plugged into the next module """ s1_embs = self._highway_layer(self._text_field_embedder(s1)) s2_embs = self._highway_layer(self._text_field_embedder(s2)) if self._elmo is not None: s1_elmo_embs = self._elmo(s1['elmo']) s2_elmo_embs = self._elmo(s2['elmo']) if "words" in s1: s1_embs = torch.cat([s1_embs, s1_elmo_embs['elmo_representations'][0]], dim=-1) s2_embs = torch.cat([s2_embs, s2_elmo_embs['elmo_representations'][0]], dim=-1) else: s1_embs = s1_elmo_embs['elmo_representations'][0] s2_embs = s2_elmo_embs['elmo_representations'][0] if self._cove is not None: s1_lens = torch.ne(s1['words'], self.pad_idx).long().sum(dim=-1).data s2_lens = torch.ne(s2['words'], self.pad_idx).long().sum(dim=-1).data s1_cove_embs = self._cove(s1['words'], s1_lens) s1_embs = torch.cat([s1_embs, s1_cove_embs], dim=-1) s2_cove_embs = self._cove(s2['words'], s2_lens) s2_embs = torch.cat([s2_embs, s2_cove_embs], dim=-1) s1_embs = self._dropout(s1_embs) s2_embs = self._dropout(s2_embs) if self._mask_lstms: s1_mask = s1_lstm_mask = util.get_text_field_mask(s1).float() s2_mask = s2_lstm_mask = util.get_text_field_mask(s2).float() s1_mask_2 = util.get_text_field_mask(s1).float() s2_mask_2 = util.get_text_field_mask(s2).float() else: s1_lstm_mask, s2_lstm_mask, s2_lstm_mask_2 = None, None, None s1_enc = self._phrase_layer(s1_embs, s1_lstm_mask) s2_enc = self._phrase_layer(s2_embs, s2_lstm_mask) # Similarity matrix # Shape: (batch_size, s2_length, s1_length) similarity_mat = self._matrix_attention(s2_enc, s1_enc) # s2 representation # Shape: (batch_size, s2_length, s1_length) s2_s1_attention = util.last_dim_softmax(similarity_mat, s1_mask) # Shape: (batch_size, s2_length, encoding_dim) s2_s1_vectors = util.weighted_sum(s1_enc, s2_s1_attention) # batch_size, seq_len, 4*enc_dim s2_w_context = torch.cat([s2_enc, s2_s1_vectors], 2) # s1 representation, using same attn method as for the s2 representation s1_s2_attention = util.last_dim_softmax(similarity_mat.transpose(1, 2).contiguous(), s2_mask) # Shape: (batch_size, s1_length, encoding_dim) s1_s2_vectors = util.weighted_sum(s2_enc, s1_s2_attention) s1_w_context = torch.cat([s1_enc, s1_s2_vectors], 2) if self._elmo is not None and self._deep_elmo: s1_w_context = torch.cat([s1_w_context, s1_elmo_embs['elmo_representations'][1]], dim=-1) s2_w_context = torch.cat([s2_w_context, s2_elmo_embs['elmo_representations'][1]], dim=-1) s1_w_context = self._dropout(s1_w_context) s2_w_context = self._dropout(s2_w_context) modeled_s2 = self._dropout(self._modeling_layer(s2_w_context, s2_lstm_mask)) s2_mask_2 = s2_mask_2.unsqueeze(dim=-1) modeled_s2.data.masked_fill_(1 - s2_mask_2.byte().data, -float('inf')) s2_enc_attn = modeled_s2.max(dim=1)[0] modeled_s1 = self._dropout(self._modeling_layer(s1_w_context, s1_lstm_mask)) s1_mask_2 = s1_mask_2.unsqueeze(dim=-1) modeled_s1.data.masked_fill_(1 - s1_mask_2.byte().data, -float('inf')) s1_enc_attn = modeled_s1.max(dim=1)[0] return torch.cat([s1_enc_attn, s2_enc_attn, torch.abs(s1_enc_attn - s2_enc_attn), s1_enc_attn * s2_enc_attn], 1)
def forward(self, act_classifications, sub_regressions, obj_regressions, anchors, union_annotations, **kwargs): alpha = 0.25 gamma = 2.0 batch_size = act_classifications.shape[0] # obj_classification_losses = [] act_classification_losses = [] # regression_losses = [] sub_regression_losses = [] obj_regression_losses = [] diff_regression_losses = [] anchor = anchors[ 0, :, :] # assuming all image sizes are the same, which it is dtype = anchors.dtype anchor_widths = anchor[:, 3] - anchor[:, 1] anchor_heights = anchor[:, 2] - anchor[:, 0] anchor_ctr_x = anchor[:, 1] + 0.5 * anchor_widths anchor_ctr_y = anchor[:, 0] + 0.5 * anchor_heights for j in range(batch_size): act_classification = act_classifications[ j, :, :] # (h*w*feat_num, num_classes) sub_regression = sub_regressions[ j, :, :] # (h*w*feat_num*num_anchor, 4) obj_regression = obj_regressions[ j, :, :] # (h*w*feat_num*num_anchor, num_union_class, 4) bbox_annotation = union_annotations[j] bbox_annotation = bbox_annotation[bbox_annotation[:, 0] >= 0] # (num_union, K) if bbox_annotation.shape[0] == 0: if torch.cuda.is_available(): act_classification_losses.append( torch.tensor(0).to(dtype).cuda()) sub_regression_losses.append( torch.tensor(0).to(dtype).cuda()) obj_regression_losses.append( torch.tensor(0).to(dtype).cuda()) diff_regression_losses.append( torch.tensor(0).to(dtype).cuda()) else: act_classification_losses.append(torch.tensor(0).to(dtype)) sub_regression_losses.append(torch.tensor(0).to(dtype)) obj_regression_losses.append(torch.tensor(0).to(dtype)) diff_regression_losses.append(torch.tensor(0).to(dtype)) continue act_classification = torch.clamp( act_classification, 1e-4, 1.0 - 1e-4) # (h*w*feat_num, num_classes) IoU = calc_iou( anchor[:, :], bbox_annotation[:, 8:12]) # (h*w*anchor_num, num_union) IoA_sub = calc_ioa( anchor[:, :], bbox_annotation[:, :4]) # (h*w*anchor_num, num_union) IoA_obj = calc_ioa( anchor[:, :], bbox_annotation[:, 4:8]) # (h*w*anchor_num, num_union) IoU_max, IoU_argmax = torch.max( IoU, dim=1) # 不同stride, (h*w*anchor_num, ) Union_IoU = (IoU > 0.25) * (IoA_sub > 0.25) * (IoA_obj > 0.25) Union_IoU = Union_IoU.float() IoU_max_ge, IoU_argmax_ge = torch.max( 0.5 * (IoU + torch.sqrt(IoA_sub * IoA_obj)) * Union_IoU, dim=1) # (h*w*anchor_num, ) # compute the loss for classification act_targets = torch.ones_like( act_classification, dtype=torch.float32) * -1 # (h*w*feat_num, num_classes) if torch.cuda.is_available(): act_targets = act_targets.cuda() act_targets[torch.lt(IoU_max, 0.4), :] = 0 # IoU < 0.4, positive_indices = torch.max(Union_IoU, dim=1)[0] > 0 # (h*w*anchor_num, 1) positive_indices_reg = torch.ge(IoU_max_ge, 0.1) # actually same as above num_positive_anchors = positive_indices.sum() assigned_act_annotation_all_fore = torch.mm( Union_IoU, bbox_annotation[:, 13:]) # (h*w*anchor_num, num_class) assigned_act_annotation_all_fore = torch.clamp( assigned_act_annotation_all_fore, 0, 1) # (h*w*anchor_num, num_class) assigned_act_annotation = bbox_annotation[ IoU_argmax_ge, 13:] # (h*w*anchor_num, num_class) assigned_annotations = bbox_annotation[IoU_argmax_ge, :] assigned_act_annotations_ignore = assigned_act_annotation_all_fore - assigned_act_annotation assigned_act_annotations_ignore = assigned_act_annotations_ignore[ positive_indices] # assert assigned_act_annotations_ignore.max() <= 1 # assert assigned_act_annotations_ignore.min() >= 0 act_targets[positive_indices, :] = 0 act_targets[positive_indices, :] = assigned_act_annotation[ positive_indices, :] act_targets = act_targets[positive_indices] act_classification = act_classification[positive_indices] act_targets = act_targets - assigned_act_annotations_ignore alpha_factor_act = torch.ones_like(act_targets, dtype=torch.float32) * alpha if torch.cuda.is_available(): alpha_factor_act = alpha_factor_act.cuda() alpha_factor_act = torch.where(torch.eq(act_targets, 1.), alpha_factor_act, 1. - alpha_factor_act) focal_weight_act = torch.where(torch.eq(act_targets, 1.), 1. - act_classification, act_classification) focal_weight_act = alpha_factor_act * torch.pow( focal_weight_act, gamma) act_bce = -( act_targets * torch.log(act_classification) + (1.0 - act_targets) * torch.log(1.0 - act_classification)) if self.dataset == "vcoco": act_cls_loss = focal_weight_act * act_bce else: act_cls_loss = focal_weight_act * act_bce * self.hoi_weight.to( dtype).cuda() # classification loss act_zeros = torch.zeros_like(act_cls_loss) act_cls_loss = torch.where( torch.ne(act_targets, -1.0), act_cls_loss, act_zeros) # ignore loss if IoU is too small act_classification_losses.append( act_cls_loss.sum() / torch.clamp(num_positive_anchors.to(dtype), min=1.0)) if positive_indices_reg.sum() > 0: assigned_annotations_sub = assigned_annotations[ positive_indices_reg, 0:4] assigned_annotations_obj = assigned_annotations[ positive_indices_reg, 4:8] sub_regression_pi = sub_regression[positive_indices_reg, :] obj_regression_pi = obj_regression[positive_indices_reg, :] anchor_widths_pi = anchor_widths[positive_indices_reg] anchor_heights_pi = anchor_heights[positive_indices_reg] anchor_ctr_x_pi = anchor_ctr_x[positive_indices_reg] anchor_ctr_y_pi = anchor_ctr_y[positive_indices_reg] sub_regression_loss = regression_loss( anchor_widths_pi, anchor_heights_pi, anchor_ctr_x_pi, anchor_ctr_y_pi, assigned_annotations_sub, sub_regression_pi) obj_regression_loss = regression_loss( anchor_widths_pi, anchor_heights_pi, anchor_ctr_x_pi, anchor_ctr_y_pi, assigned_annotations_obj, obj_regression_pi) diff_regression_loss = union_regression_loss( anchor_widths_pi, anchor_heights_pi, anchor_ctr_x_pi, anchor_ctr_y_pi, assigned_annotations_sub, assigned_annotations_obj, sub_regression_pi, obj_regression_pi) sub_regression_losses.append(sub_regression_loss.mean()) obj_regression_losses.append(obj_regression_loss.mean()) diff_regression_losses.append(diff_regression_loss.mean()) else: if torch.cuda.is_available(): sub_regression_losses.append( torch.tensor(0).to(dtype).cuda()) obj_regression_losses.append( torch.tensor(0).to(dtype).cuda()) diff_regression_losses.append( torch.tensor(0).to(dtype).cuda()) else: sub_regression_losses.append(torch.tensor(0).to(dtype)) obj_regression_losses.append(torch.tensor(0).to(dtype)) diff_regression_losses.append(torch.tensor(0).to(dtype)) return torch.stack(act_classification_losses).mean(dim=0, keepdim=True), \ torch.stack(sub_regression_losses).mean(dim=0, keepdim=True), \ torch.stack(obj_regression_losses).mean(dim=0, keepdim=True), \ torch.stack(diff_regression_losses).mean(dim=0, keepdim=True)
def main(): """Inference for semantic segmentation. """ # Retreve experiment configurations. args = parse_args('Inference for semantic segmentation.') config.network.kmeans_num_clusters = separate_comma(args.kmeans_num_clusters) config.network.label_divisor = args.label_divisor # Create directories to save results. semantic_dir = os.path.join(args.save_dir, 'semantic_gray') semantic_rgb_dir = os.path.join(args.save_dir, 'semantic_color') # Create color map. color_map = vis_utils.load_color_map(config.dataset.color_map_path) color_map = color_map.numpy() # Create data loaders. test_dataset = ListDataset( data_dir=args.data_dir, data_list=args.data_list, img_mean=config.network.pixel_means, img_std=config.network.pixel_stds, size=None, random_crop=False, random_scale=False, random_mirror=False, training=False) test_image_paths = test_dataset.image_paths # Create models. if config.network.backbone_types == 'panoptic_pspnet_101': embedding_model = resnet_101_pspnet(config).cuda() elif config.network.backbone_types == 'panoptic_deeplab_101': embedding_model = resnet_101_deeplab(config).cuda() else: raise ValueError('Not support ' + config.network.backbone_types) if config.network.prediction_types == 'segsort': prediction_model = segsort(config) else: raise ValueError('Not support ' + config.network.prediction_types) embedding_model = embedding_model.to("cuda:0") prediction_model = prediction_model.to("cuda:0") embedding_model.eval() prediction_model.eval() # Load trained weights. model_path_template = os.path.join(args.snapshot_dir, 'model-{:d}.pth') save_iter = config.train.max_iteration - 1 embedding_model.load_state_dict( torch.load(model_path_template.format(save_iter))['embedding_model'], resume=True) prediction_model.load_state_dict( torch.load(model_path_template.format(save_iter))['prediction_model']) # Define CRF. postprocessor = DenseCRF( iter_max=args.crf_iter_max, pos_xy_std=args.crf_pos_xy_std, pos_w=args.crf_pos_w, bi_xy_std=args.crf_bi_xy_std, bi_rgb_std=args.crf_bi_rgb_std, bi_w=args.crf_bi_w,) # Load memory prototypes. semantic_memory_prototypes, semantic_memory_prototype_labels = None, None if args.semantic_memory_dir is not None: semantic_memory_prototypes, semantic_memory_prototype_labels = ( segsort_others.load_memory_banks(args.semantic_memory_dir)) semantic_memory_prototypes = semantic_memory_prototypes.to("cuda:0") semantic_memory_prototype_labels = semantic_memory_prototype_labels.to("cuda:0") # Remove ignore class. valid_prototypes = torch.ne( semantic_memory_prototype_labels, config.dataset.semantic_ignore_index).nonzero() valid_prototypes = valid_prototypes.view(-1) semantic_memory_prototypes = torch.index_select( semantic_memory_prototypes, 0, valid_prototypes) semantic_memory_prototype_labels = torch.index_select( semantic_memory_prototype_labels, 0, valid_prototypes) # Start inferencing. for data_index in tqdm(range(len(test_dataset))): # Image path. image_path = test_image_paths[data_index] base_name = os.path.basename(image_path).replace('.jpg', '.png') # Image resolution. image_batch, label_batch, _ = test_dataset[data_index] image_h, image_w = image_batch['image'].shape[-2:] # Resize the input image. if config.test.image_size > 0: image_batch['image'] = transforms.resize_with_interpolation( image_batch['image'].transpose(1, 2, 0), config.test.image_size, method='bilinear').transpose(2, 0, 1) for lab_name in ['semantic_label', 'instance_label']: label_batch[lab_name] = transforms.resize_with_interpolation( label_batch[lab_name], config.test.image_size, method='nearest') resize_image_h, resize_image_w = image_batch['image'].shape[-2:] # Crop and Pad the input image. image_batch['image'] = transforms.resize_with_pad( image_batch['image'].transpose(1, 2, 0), config.test.crop_size, image_pad_value=0).transpose(2, 0, 1) image_batch['image'] = torch.FloatTensor( image_batch['image'][np.newaxis, ...]).to("cuda:0") pad_image_h, pad_image_w = image_batch['image'].shape[-2:] # Create the fake labels where clustering ignores 255. fake_label_batch = {} for label_name in ['semantic_label', 'instance_label']: lab = np.zeros((resize_image_h, resize_image_w), dtype=np.uint8) lab = transforms.resize_with_pad( lab, config.test.crop_size, image_pad_value=config.dataset.semantic_ignore_index) fake_label_batch[label_name] = torch.LongTensor( lab[np.newaxis, ...]).to("cuda:0") # Put label batch to gpu 1. for k, v in label_batch.items(): label_batch[k] = torch.LongTensor(v[np.newaxis, ...]).to("cuda:0") # Create the ending index of each patch. stride_h, stride_w = config.test.stride crop_h, crop_w = config.test.crop_size npatches_h = math.ceil(1.0 * (pad_image_h-crop_h) / stride_h) + 1 npatches_w = math.ceil(1.0 * (pad_image_w-crop_w) / stride_w) + 1 patch_ind_h = np.linspace( crop_h, pad_image_h, npatches_h, dtype=np.int32) patch_ind_w = np.linspace( crop_w, pad_image_w, npatches_w, dtype=np.int32) # Create place holder for full-resolution embeddings. embeddings = {} counts = torch.FloatTensor( 1, 1, pad_image_h, pad_image_w).zero_().to("cuda:0") with torch.no_grad(): for ind_h in patch_ind_h: for ind_w in patch_ind_w: sh, eh = ind_h - crop_h, ind_h sw, ew = ind_w - crop_w, ind_w crop_image_batch = { k: v[:, :, sh:eh, sw:ew] for k, v in image_batch.items()} # Feed-forward. crop_embeddings = embedding_model.generate_embeddings( crop_image_batch, resize_as_input=True) # Initialize embedding. for name in crop_embeddings: if crop_embeddings[name] is None: continue crop_emb = crop_embeddings[name].to("cuda:0") if name in ['embedding']: crop_emb = common_utils.normalize_embedding( crop_emb.permute(0, 2, 3, 1).contiguous()) crop_emb = crop_emb.permute(0, 3, 1, 2) else: continue if name not in embeddings.keys(): embeddings[name] = torch.FloatTensor( 1, crop_emb.shape[1], pad_image_h, pad_image_w).zero_().to("cuda:0") embeddings[name][:, :, sh:eh, sw:ew] += crop_emb counts[:, :, sh:eh, sw:ew] += 1 for k in embeddings.keys(): embeddings[k] /= counts # KMeans. lab_div = config.network.label_divisor fake_sem_lab = fake_label_batch['semantic_label'] fake_inst_lab = fake_label_batch['instance_label'] clustering_outputs = embedding_model.generate_clusters( embeddings.get('embedding', None), fake_sem_lab, fake_inst_lab) embeddings.update(clustering_outputs) # Generate predictions. outputs = prediction_model( embeddings, {'semantic_memory_prototype': semantic_memory_prototypes, 'semantic_memory_prototype_label': semantic_memory_prototype_labels}, with_loss=False, with_prediction=True) semantic_topk = outputs['semantic_score'] # DenseCRF post-processing. semantic_prob = common_utils.one_hot( semantic_topk, max_label=config.dataset.num_classes) semantic_prob = semantic_prob.sum(dim=1).float() / semantic_topk.shape[1] semantic_prob = semantic_prob.view(resize_image_h, resize_image_w, -1) semantic_prob = semantic_prob.data.cpu().numpy().astype(np.float32) semantic_prob = semantic_prob.transpose(2, 0, 1) image = image_batch['image'].data.cpu().numpy().astype(np.float32) image = image[0, :, :resize_image_h, :resize_image_w].transpose(1, 2, 0) image *= np.reshape(config.network.pixel_stds, (1, 1, 3)) image += np.reshape(config.network.pixel_means, (1, 1, 3)) image = image * 255 image = image.astype(np.uint8) semantic_prob = postprocessor(image, semantic_prob) # Save semantic predictions. semantic_pred = np.argmax(semantic_prob, axis=0).astype(np.uint8) semantic_pred = cv2.resize( semantic_pred, (image_w, image_h), interpolation=cv2.INTER_NEAREST) semantic_pred_name = os.path.join( semantic_dir, base_name) if not os.path.isdir(os.path.dirname(semantic_pred_name)): os.makedirs(os.path.dirname(semantic_pred_name)) Image.fromarray(semantic_pred, mode='L').save(semantic_pred_name) semantic_pred_rgb = color_map[semantic_pred] semantic_pred_rgb_name = os.path.join( semantic_rgb_dir, base_name) if not os.path.isdir(os.path.dirname(semantic_pred_rgb_name)): os.makedirs(os.path.dirname(semantic_pred_rgb_name)) Image.fromarray(semantic_pred_rgb, mode='RGB').save( semantic_pred_rgb_name)
def test_comparison_ops_with_type_promotion(self, device): value_for_type = { torch.uint8: (1 << 5), torch.int8: (1 << 5), torch.int16: (1 << 10), torch.int32: (1 << 20), torch.int64: (1 << 35), torch.float16: (1 << 10), torch.float32: (1 << 20), torch.float64: (1 << 35), torch.complex64: (1 << 20), torch.complex128: (1 << 35) } comparison_ops = [ dict( name="lt", out_op=lambda x, y, d: torch.lt( x, y, out=torch.empty(1, dtype=torch.bool, device=d)), ret_op=lambda x, y: torch.lt(x, y), compare_op=lambda x, y: x < y, ), dict( name="le", out_op=lambda x, y, d: torch.le( x, y, out=torch.empty(1, dtype=torch.bool, device=d)), ret_op=lambda x, y: torch.le(x, y), compare_op=lambda x, y: x <= y, ), dict( name="gt", out_op=lambda x, y, d: torch.gt( x, y, out=torch.empty(1, dtype=torch.bool, device=d)), ret_op=lambda x, y: torch.gt(x, y), compare_op=lambda x, y: x > y, ), dict( name="ge", out_op=lambda x, y, d: torch.ge( x, y, out=torch.empty(1, dtype=torch.bool, device=d)), ret_op=lambda x, y: torch.ge(x, y), compare_op=lambda x, y: x >= y, ), dict( name="eq", out_op=lambda x, y, d: torch.eq( x, y, out=torch.empty(1, dtype=torch.bool, device=d)), ret_op=lambda x, y: torch.eq(x, y), compare_op=lambda x, y: x == y, ), dict( name="ne", out_op=lambda x, y, d: torch.ne( x, y, out=torch.empty(1, dtype=torch.bool, device=d)), ret_op=lambda x, y: torch.ne(x, y), compare_op=lambda x, y: x != y, ), ] for op in comparison_ops: for dt1 in torch.testing.get_all_math_dtypes(device): for dt2 in torch.testing.get_all_math_dtypes(device): if (dt1.is_complex or dt2.is_complex ) and not (op["name"] == "eq" or op["name"] == "ne"): continue val1 = value_for_type[dt1] val2 = value_for_type[dt2] t1 = torch.tensor([val1], dtype=dt1, device=device) t2 = torch.tensor([val2], dtype=dt2, device=device) expected = torch.tensor([op["compare_op"](val1, val2)], dtype=torch.bool) out_res = op["out_op"](t1, t2, device) self.assertEqual(out_res, expected) self.assertTrue(out_res.dtype == torch.bool) self.assertTrue(t1.dtype == dt1) self.assertTrue(t2.dtype == dt2) out_res = op["ret_op"](t1, t2) self.assertEqual(out_res, expected) self.assertTrue(out_res.dtype == torch.bool) self.assertTrue(t1.dtype == dt1) self.assertTrue(t2.dtype == dt2) # test that comparing a zero dim tensor with another zero dim tensor has type promotion behavior t1 = torch.tensor(val1, dtype=dt1, device=device) t2 = torch.tensor(val2, dtype=dt2, device=device) expected = torch.tensor(op["compare_op"](val1, val2), dtype=torch.bool) out_res = op["out_op"](t1, t2, device) self.assertEqual(out_res, expected) self.assertTrue(out_res.dtype == torch.bool) self.assertTrue(t1.dtype == dt1) self.assertTrue(t2.dtype == dt2) out_res = op["ret_op"](t1, t2) self.assertEqual(out_res, expected) self.assertTrue(out_res.dtype == torch.bool) self.assertTrue(t1.dtype == dt1) self.assertTrue(t2.dtype == dt2)
def test_ne(self): x = torch.randn(1, 2, 3, 1, requires_grad=False).int() y = torch.randn(1, 4, requires_grad=False).int() self.assertONNX(lambda x, y: torch.ne(x, y), (x, y))
def forward(self, classifications, regressions, anchors, annotations, **kwargs): alpha = 0.25 gamma = 2.0 batch_size = classifications.shape[0] classification_losses = [] regression_losses = [] anchor = anchors[ 0, :, :] # assuming all image sizes are the same, which it is dtype = anchors.dtype anchor_widths = anchor[:, 3] - anchor[:, 1] anchor_heights = anchor[:, 2] - anchor[:, 0] anchor_ctr_x = anchor[:, 1] + 0.5 * anchor_widths anchor_ctr_y = anchor[:, 0] + 0.5 * anchor_heights for j in range(batch_size): classification = classifications[ j, :, :] # (h*w*feat_num, num_classes) regression = regressions[j, :, :] # (h*w*feat_num, num_anchor*4) bbox_annotation = annotations[j] bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1] # (num_boxes, 5) if bbox_annotation.shape[0] == 0: if torch.cuda.is_available(): regression_losses.append(torch.tensor(0).to(dtype).cuda()) classification_losses.append( torch.tensor(0).to(dtype).cuda()) else: regression_losses.append(torch.tensor(0).to(dtype)) classification_losses.append(torch.tensor(0).to(dtype)) continue classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4) IoU = calc_iou(anchor[:, :], bbox_annotation[:, :4]) IoU_max, IoU_argmax = torch.max(IoU, dim=1) # 不同stride # compute the loss for classification targets = torch.ones_like(classification) * -1 if torch.cuda.is_available(): targets = targets.cuda() targets[torch.lt(IoU_max, 0.4), :] = 0 # IoU < 0.4 positive_indices = torch.ge(IoU_max, 0.5) # IoU > 0.5 num_positive_anchors = positive_indices.sum() assigned_annotations = bbox_annotation[IoU_argmax, :] targets[positive_indices, :] = 0 targets[positive_indices, assigned_annotations[ positive_indices, 4].long()] = 1 # set the corresponding categories as 1 alpha_factor = torch.ones_like(targets) * alpha if torch.cuda.is_available(): alpha_factor = alpha_factor.cuda() alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor) focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification) focal_weight = alpha_factor * torch.pow(focal_weight, gamma) bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification)) cls_loss = focal_weight * bce # 分类loss zeros = torch.zeros_like(cls_loss) if torch.cuda.is_available(): zeros = zeros.cuda() cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, zeros) # ignore loss if IoU is too small classification_losses.append( cls_loss.sum() / torch.clamp(num_positive_anchors.to(dtype), min=1.0)) if positive_indices.sum() > 0: assigned_annotations = assigned_annotations[ positive_indices, :] anchor_widths_pi = anchor_widths[positive_indices] anchor_heights_pi = anchor_heights[positive_indices] anchor_ctr_x_pi = anchor_ctr_x[positive_indices] anchor_ctr_y_pi = anchor_ctr_y[positive_indices] gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0] gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1] gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights # efficientdet style gt_widths = torch.clamp(gt_widths, min=1) gt_heights = torch.clamp(gt_heights, min=1) targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi targets_dw = torch.log(gt_widths / anchor_widths_pi) targets_dh = torch.log(gt_heights / anchor_heights_pi) targets = torch.stack( (targets_dy, targets_dx, targets_dh, targets_dw)) targets = targets.t() regression_diff = torch.abs(targets - regression[positive_indices, :]) regression_loss = torch.where( torch.le(regression_diff, 1.0 / 9.0), 0.5 * 9.0 * torch.pow(regression_diff, 2), regression_diff - 0.5 / 9.0) regression_losses.append(regression_loss.mean()) else: if torch.cuda.is_available(): regression_losses.append(torch.tensor(0).to(dtype).cuda()) else: regression_losses.append(torch.tensor(0).to(dtype)) # debug imgs = kwargs.get('imgs', None) if imgs is not None: regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() obj_list = kwargs.get('obj_list', None) out = postprocess( imgs.detach(), torch.stack([anchors[0]] * imgs.shape[0], 0).detach(), regressions.detach(), classifications.detach(), regressBoxes, clipBoxes, 0.5, 0.3) imgs = imgs.permute(0, 2, 3, 1).cpu().numpy() imgs = ((imgs * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255).astype(np.uint8) imgs = [cv2.cvtColor(img, cv2.COLOR_RGB2BGR) for img in imgs] display(out, imgs, obj_list, imshow=False, imwrite=True) return torch.stack(classification_losses).mean(dim=0, keepdim=True), \ torch.stack(regression_losses).mean(dim=0, keepdim=True)
def forward(self, classifications, regressions, anchors, annotations): alpha = self.alpha gamma = self.gamma batch_size = classifications.shape[0] classification_losses = [] regression_losses = [] anchor = anchors[0, :, :] anchor_widths = anchor[:, 2] - anchor[:, 0] anchor_heights = anchor[:, 3] - anchor[:, 1] anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights for j in range(batch_size): classification = classifications[j, :, :] regression = regressions[j, :, :] bbox_annotation = annotations[j, :, :] bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1] #If self.loss_with_no_bboxes is True, focal loss will take those pictures without bboxes into account. #Though so far I haven't figure out the exact parameter to make this method work!!! (customized by Yu Han Huang) if bbox_annotation.shape[ 0] == 0 and self.loss_with_no_bboxes == False: regression_losses.append(torch.tensor(0).float().cuda()) classification_losses.append(torch.tensor(0).float().cuda()) continue classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4) if bbox_annotation.shape[0] != 0: IoU = calc_iou( anchors[0, :, :], bbox_annotation[:, :4]) # num_anchors x num_annotations IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1 #print(IoU_max.shape) #import pdb #pdb.set_trace() # compute the loss for classification targets = torch.ones(classification.shape) * -1 targets = targets.cuda() targets[torch.lt(IoU_max, 0.4), :] = 0 positive_indices = torch.ge(IoU_max, 0.4) num_positive_anchors = positive_indices.sum() assigned_annotations = bbox_annotation[IoU_argmax, :] targets[positive_indices, :] = 0 targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1 alpha_factor = torch.ones(targets.shape).cuda() * alpha alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor) focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification) focal_weight = alpha_factor * torch.pow(focal_weight, gamma) bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification)) # cls_loss = focal_weight * torch.pow(bce, gamma) cls_loss = focal_weight * bce cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda()) classification_losses.append( cls_loss.sum() / torch.clamp(num_positive_anchors.float(), min=1.0)) #This is the part of the code where those pictures without bboxes are take into account(customized by Yu Han Huang) elif self.loss_with_no_bboxes == True: targets = torch.ones(classification.shape) targets = targets.cuda() targets[torch.le(classification[:, 0], 0.5), :] = 0 targets_num = targets.sum() targets = 1 - targets alpha_factor = torch.ones( targets.shape).cuda() * self.no_bboxes_alpha alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor) focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification) focal_weight = alpha_factor * torch.pow( focal_weight, self.no_bboxes_gamma) bce = -((1.0 - targets) * torch.log(1.0 - classification)) cls_loss = focal_weight * bce classification_losses.append( cls_loss.sum() / torch.clamp(targets_num.float(), min=1.0)) # compute the loss for regression if bbox_annotation.shape[0] != 0: if positive_indices.sum() > 0: assigned_annotations = assigned_annotations[ positive_indices, :] anchor_widths_pi = anchor_widths[positive_indices] anchor_heights_pi = anchor_heights[positive_indices] anchor_ctr_x_pi = anchor_ctr_x[positive_indices] anchor_ctr_y_pi = anchor_ctr_y[positive_indices] gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0] gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1] gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights # clip widths to 1 gt_widths = torch.clamp(gt_widths, min=1) gt_heights = torch.clamp(gt_heights, min=1) targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi targets_dw = torch.log(gt_widths / anchor_widths_pi) targets_dh = torch.log(gt_heights / anchor_heights_pi) targets = torch.stack( (targets_dx, targets_dy, targets_dw, targets_dh)) targets = targets.t() targets = targets / torch.Tensor([[0.1, 0.1, 0.2, 0.2] ]).cuda() negative_indices = 1 - positive_indices regression_diff = torch.abs( targets - regression[positive_indices, :]) regression_loss = torch.where( torch.le(regression_diff, 1.0 / 9.0), 0.5 * 9.0 * torch.pow(regression_diff, 2), regression_diff - 0.5 / 9.0) regression_losses.append(regression_loss.mean()) else: regression_losses.append(torch.tensor(0).float().cuda()) else: regression_losses.append(torch.tensor(0).float().cuda()) return torch.stack(classification_losses).mean( dim=0, keepdim=True), torch.stack(regression_losses).mean(dim=0, keepdim=True)