def get_loss(self, image_a_pred, image_b_pred, mask_a, mask_b): loss = 0 # get the nonzero indices mask_a_indices_flat = torch.nonzero(mask_a) mask_b_indices_flat = torch.nonzero(mask_b) if len(mask_a_indices_flat) == 0: return Variable(torch.cuda.LongTensor([0]), requires_grad=True) if len(mask_b_indices_flat) == 0: return Variable(torch.cuda.LongTensor([0]), requires_grad=True) # take 5000 random pixel samples of the object, using the mask num_samples = 10000 rand_numbers_a = (torch.rand(num_samples)*len(mask_a_indices_flat)).cuda() rand_indices_a = Variable(torch.floor(rand_numbers_a).type(torch.cuda.LongTensor), requires_grad=False) randomized_mask_a_indices_flat = torch.index_select(mask_a_indices_flat, 0, rand_indices_a).squeeze(1) rand_numbers_b = (torch.rand(num_samples)*len(mask_b_indices_flat)).cuda() rand_indices_b = Variable(torch.floor(rand_numbers_b).type(torch.cuda.LongTensor), requires_grad=False) randomized_mask_b_indices_flat = torch.index_select(mask_b_indices_flat, 0, rand_indices_b).squeeze(1) # index into the image and get descriptors M_margin = 0.5 # margin parameter random_img_a_object_descriptors = torch.index_select(image_a_pred, 1, randomized_mask_a_indices_flat) random_img_b_object_descriptors = torch.index_select(image_b_pred, 1, randomized_mask_b_indices_flat) pixel_wise_loss = (random_img_a_object_descriptors - random_img_b_object_descriptors).pow(2).sum(dim=2) pixel_wise_loss = torch.add(pixel_wise_loss, -2*M_margin) zeros_vec = torch.zeros_like(pixel_wise_loss) loss += torch.max(zeros_vec, pixel_wise_loss).sum() return loss
def __call__(self, anchors, objectness, box_regression, targets): """ Arguments: anchors (list[BoxList]) objectness (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: objectness_loss (Tensor) box_loss (Tensor """ anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors] labels, regression_targets = self.prepare_targets(anchors, targets) sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) objectness_flattened = [] box_regression_flattened = [] # for each feature level, permute the outputs to make them be in the # same format as the labels. Note that the labels are computed for # all feature levels concatenated, so we keep the same representation # for the objectness and the box_regression for objectness_per_level, box_regression_per_level in zip( objectness, box_regression ): N, A, H, W = objectness_per_level.shape objectness_per_level = objectness_per_level.permute(0, 2, 3, 1).reshape( N, -1 ) box_regression_per_level = box_regression_per_level.view(N, -1, 4, H, W) box_regression_per_level = box_regression_per_level.permute(0, 3, 4, 1, 2) box_regression_per_level = box_regression_per_level.reshape(N, -1, 4) objectness_flattened.append(objectness_per_level) box_regression_flattened.append(box_regression_per_level) # concatenate on the first dimension (representing the feature levels), to # take into account the way the labels were generated (with all feature maps # being concatenated as well) objectness = cat(objectness_flattened, dim=1).reshape(-1) box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4) labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) box_loss = smooth_l1_loss( box_regression[sampled_pos_inds], regression_targets[sampled_pos_inds], beta=1.0 / 9, size_average=False, ) / (sampled_inds.numel()) objectness_loss = F.binary_cross_entropy_with_logits( objectness[sampled_inds], labels[sampled_inds] ) return objectness_loss, box_loss
def _get_bbox_regression_labels_pytorch(self, bbox_target_data, labels_batch, num_classes): """Bounding-box regression targets (bbox_target_data) are stored in a compact form b x N x (class, tx, ty, tw, th) This function expands those targets into the 4-of-4*K representation used by the network (i.e. only one class has non-zero targets). Returns: bbox_target (ndarray): b x N x 4K blob of regression targets bbox_inside_weights (ndarray): b x N x 4K blob of loss weights """ batch_size = labels_batch.size(0) rois_per_image = labels_batch.size(1) clss = labels_batch bbox_targets = bbox_target_data.new(batch_size, rois_per_image, 4).zero_() bbox_inside_weights = bbox_target_data.new(bbox_targets.size()).zero_() for b in range(batch_size): # assert clss[b].sum() > 0 if clss[b].sum() == 0: continue inds = torch.nonzero(clss[b] > 0).view(-1) for i in range(inds.numel()): ind = inds[i] bbox_targets[b, ind, :] = bbox_target_data[b, ind, :] bbox_inside_weights[b, ind, :] = self.BBOX_INSIDE_WEIGHTS return bbox_targets, bbox_inside_weights
def set_low_quality_matches_(self, matches, all_matches, match_quality_matrix): """ Produce additional matches for predictions that have only low-quality matches. Specifically, for each ground-truth find the set of predictions that have maximum overlap with it (including ties); for each prediction in that set, if it is unmatched, then match it to the ground-truth with which it has the highest quality value. """ # For each gt, find the prediction with which it has highest quality highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1) # Find highest quality match available, even if it is low, including ties gt_pred_pairs_of_highest_quality = torch.nonzero( match_quality_matrix == highest_quality_foreach_gt[:, None] ) # Example gt_pred_pairs_of_highest_quality: # tensor([[ 0, 39796], # [ 1, 32055], # [ 1, 32070], # [ 2, 39190], # [ 2, 40255], # [ 3, 40390], # [ 3, 41455], # [ 4, 45470], # [ 5, 45325], # [ 5, 46390]]) # Each row is a (gt index, prediction index) # Note how gt items 1, 2, 3, and 5 each have two ties pred_inds_to_update = gt_pred_pairs_of_highest_quality[:, 1] matches[pred_inds_to_update] = all_matches[pred_inds_to_update]
def subsample(self, proposals, targets): """ This method performs the positive/negative sampling, and return the sampled proposals. Note: this function keeps a state. Arguments: proposals (list[BoxList]) targets (list[BoxList]) """ labels, regression_targets = self.prepare_targets(proposals, targets) sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) proposals = list(proposals) # add corresponding label and regression_targets information to the bounding boxes for labels_per_image, regression_targets_per_image, proposals_per_image in zip( labels, regression_targets, proposals ): proposals_per_image.add_field("labels", labels_per_image) proposals_per_image.add_field( "regression_targets", regression_targets_per_image ) # distributed sampled proposals, that were obtained on all feature maps # concatenated via the fg_bg_sampler, into individual feature map levels for img_idx, (pos_inds_img, neg_inds_img) in enumerate( zip(sampled_pos_inds, sampled_neg_inds) ): img_sampled_inds = torch.nonzero(pos_inds_img | neg_inds_img).squeeze(1) proposals_per_image = proposals[img_idx][img_sampled_inds] proposals[img_idx] = proposals_per_image self._proposals = proposals return proposals
def evaluate(data_loader, model, device): """ evaluate the current model, get the accuracy for dev/test set Keyword arguments: data_loader: pytorch build-in data loader output model: model to be evaluated device: cpu of gpu """ model.eval() num_examples = 0 error = 0 for idx, batch in enumerate(data_loader): question_text = batch['text'].to(device) question_len = batch['len'] labels = batch['labels'] ####Your code here top_n, top_i = logits.topk(1) num_examples += question_text.size(0) error += torch.nonzero(top_i.squeeze() - torch.LongTensor(labels)).size(0) accuracy = 1 - error / num_examples print('accuracy', accuracy) return accuracy
def forward(self, x, boxes): """ Arguments: x (list[Tensor]): feature maps for each level boxes (list[BoxList]): boxes to be used to perform the pooling operation. Returns: result (Tensor) """ num_levels = len(self.poolers) rois = self.convert_to_roi_format(boxes) if num_levels == 1: return self.poolers[0](x[0], rois) levels = self.map_levels(boxes) num_rois = len(rois) num_channels = x[0].shape[1] output_size = self.output_size[0] dtype, device = x[0].dtype, x[0].device result = torch.zeros( (num_rois, num_channels, output_size, output_size), dtype=dtype, device=device, ) for level, (per_level_feature, pooler) in enumerate(zip(x, self.poolers)): idx_in_level = torch.nonzero(levels == level).squeeze(1) rois_per_level = rois[idx_in_level] result[idx_in_level] = pooler(per_level_feature, rois_per_level) return result
def _compute_loss(self, batch, output, target): scores = self.generator(self._bottle(output)) gtruth = target.view(-1) if self.confidence < 1: tdata = gtruth.data mask = torch.nonzero(tdata.eq(self.padding_idx)).squeeze() log_likelihood = torch.gather(scores.data, 1, tdata.unsqueeze(1)) tmp_ = self.one_hot.repeat(gtruth.size(0), 1) tmp_.scatter_(1, tdata.unsqueeze(1), self.confidence) if mask.dim() > 0: log_likelihood.index_fill_(0, mask, 0) tmp_.index_fill_(0, mask, 0) gtruth = Variable(tmp_, requires_grad=False) loss = self.criterion(scores, gtruth) if self.confidence < 1: # Default: report smoothed ppl. # loss_data = -log_likelihood.sum(0) loss_data = loss.data.clone() else: loss_data = loss.data.clone() stats = self._stats(loss_data, scores.data, target.view(-1).data) return loss, stats
def predict(self, wm, s, a, ls): with torch.no_grad(): self.embedding, _ = create_emb_layer(wm) s_embedded = self.embedding(s) a_embedded = self.embedding(a) # Average the aspect embedding a_new_embedded = torch.zeros(len(s),1,100) for i in range(len(a_embedded)): if len(torch.nonzero(a_embedded[i])): a_new_embedded[i] = torch.unsqueeze(torch.sum(a_embedded[i], 0)/len(torch.nonzero(a_embedded[i])),0) a_embedded = a_new_embedded embedded = torch.zeros(len(s),40,200) # Concatenate each word in sentence with aspect vector zero_tag = torch.zeros(100).cuda() for i in range(len(s_embedded)): for j in range(40): if j<(ls[i]-1): embedded[i][j] = torch.unsqueeze(torch.cat((s_embedded[i][j].cuda(),torch.squeeze(a_embedded[i].cuda(),0)),0),0) else: embedded[i][j] = torch.unsqueeze(torch.cat((s_embedded[i][j].cuda(),zero_tag),0),0) out, (h, c) = self.lstm(embedded.cuda()) hidden = self.dropout(torch.cat((h[-2,:,:], h[-1,:,:]), dim=1)) hidden2pred = self.fc(hidden) pred = self.softmax(hidden2pred) return pred
def random_sample_from_masked_image_torch(img_mask, num_samples): """ :param img_mask: Numpy array [H,W] or torch.Tensor with shape [H,W] :type img_mask: :param num_samples: an integer :type num_samples: :return: tuple of torch.LongTensor in (u,v) format. Each torch.LongTensor has shape [num_samples] :rtype: """ image_height, image_width = img_mask.shape if isinstance(img_mask, np.ndarray): img_mask_torch = torch.from_numpy(img_mask).float() else: img_mask_torch = img_mask # This code would randomly subsample from the mask mask = img_mask_torch.view(image_width*image_height,1).squeeze(1) mask_indices_flat = torch.nonzero(mask) if len(mask_indices_flat) == 0: return (None, None) rand_numbers = torch.rand(num_samples)*len(mask_indices_flat) rand_indices = torch.floor(rand_numbers).long() uv_vec_flattened = torch.index_select(mask_indices_flat, 0, rand_indices).squeeze(1) uv_vec = utils.flattened_pixel_locations_to_u_v(uv_vec_flattened, image_width) return uv_vec
def landmark_loss(self,gt_label,gt_landmark,pred_landmark): mask = torch.eq(gt_label,-2) chose_index = torch.nonzero(mask.data) chose_index = torch.squeeze(chose_index) valid_gt_landmark = gt_landmark[chose_index, :] valid_pred_landmark = pred_landmark[chose_index, :] return self.loss_landmark(valid_pred_landmark, valid_gt_landmark)
def train(train_X, train_Y): model.train() total_loss = 0. for batch, i in enumerate( xrange(0, len(train_X.data) - BATCH_SIZE + 1, BATCH_SIZE)): digits_correct = 0 digits_total = 0 batch_loss = 0. X, Y = train_X[i:i + BATCH_SIZE, :, :], train_Y[i:i + BATCH_SIZE, :] # # Buffered model zero = Variable(torch.zeros(BATCH_SIZE, 3)) num_iterations = TIME_FN(2 * MAX_LENGTH) model.init_model(BATCH_SIZE, X) for j in xrange(num_iterations): model.forward() for j in xrange(MAX_LENGTH): model._buffer_out.pop(1.) a = model._buffer_out.read(1.) # # Normal seq2seq # model.init_stack(BATCH_SIZE) # for j in xrange(2 * MAX_LENGTH): # a = model.forward(X[:,j,:]) indices = Y[:, j] != 2 valid_a = a[indices.view(-1, 1)].view(-1, 3) valid_Y = Y[:, j][indices] if len(valid_a) == 0: continue _, valid_y_ = torch.max(valid_a, 1) digits_total += len(valid_a) digits_correct += len(torch.nonzero((valid_y_ == valid_Y).data)) batch_loss += criterion(valid_a, valid_Y) # Add regularization loss and reset the tracker. batch_loss += model.get_and_reset_reg_loss() # update the weights optimizer.zero_grad() batch_loss.backward() optimizer.step() total_loss += batch_loss.data if batch % 10 == 9: mean_loss = sum(batch_loss.data) print "batches {}-{}: loss={:.4f}, acc={:.2f}".format(batch - 9, batch, mean_loss, digits_correct / digits_total)
def box_loss(self,gt_label,gt_offset,pred_offset): #get the mask element which != 0 mask = torch.ne(gt_label,0) #convert mask to dim index chose_index = torch.nonzero(mask) chose_index = torch.squeeze(chose_index) #only valid element can effect the loss valid_gt_offset = gt_offset[chose_index,:] valid_pred_offset = pred_offset[chose_index,:] valid_pred_offset = torch.squeeze(valid_pred_offset) return self.loss_box(valid_pred_offset,valid_gt_offset)
def forward(self, s, a, ls): with torch.no_grad(): embedded = self.embedding(s.cuda()) a_embedded = self.embedding(a.cuda()) # Average the aspect embedding a_new_embedded = torch.zeros(len(s),100) for i in range(len(a_embedded)): if len(torch.nonzero(a_embedded[i])): a_new_embedded[i] = torch.sum(a_embedded[i], 0)/len(torch.nonzero(a_embedded[i])) a_embedded = a_new_embedded out, (h, c) = self.lstm(embedded) hidden = self.dropout(torch.cat((h[-2,:,:], h[-1,:,:]), dim=1)) with torch.no_grad(): new_embedded = torch.cat((hidden.cuda(), a_embedded.cuda()),1) hidden2pred = self.fc(new_embedded) pred = self.softmax(hidden2pred) return pred
def forward(self, x, target): assert x.size(1) == self.size true_dist = x.data.clone() true_dist.fill_(self.smoothing / (self.size - 2)) true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence) true_dist[:, self.padding_idx] = 0 mask = torch.nonzero(target.data == self.padding_idx) if mask.dim() > 0: true_dist.index_fill_(0, mask.squeeze(), 0.0) self.true_dist = true_dist loss = self.criterion(x, Variable(true_dist, requires_grad=False)) return loss
def _nonzero_counter_hook(module, inputs, output): """ Module hook used to count the number of nonzero floating point values from all the tensors used by the given network during inference. This hook will be called every time before :func:`forward` is invoked. See :func:`torch.nn.Module.register_forward_hook` """ if not hasattr(module, "__counter_nonzero__"): raise ValueError("register_counter_nonzero was not called for this network") if module.training: return size = module.__counter_nonzero__.get("input", 0) size += sum([torch.nonzero(i).size(0) for i in inputs]) module.__counter_nonzero__["input"] = size size = module.__counter_nonzero__.get("output", 0) size += torch.nonzero(output).size(0) module.__counter_nonzero__["output"] = size for name, param in module._parameters.items(): if param is None: continue size = module.__counter_nonzero__.get(name, 0) size += torch.nonzero(param.data).size(0) module.__counter_nonzero__[name] = size for name, buffer in module._buffers.items(): if buffer is None: continue size = module.__counter_nonzero__.get(name, 0) size += torch.nonzero(buffer).size(0) module.__counter_nonzero__[name] = size
def __call__(self, class_logits, box_regression): """ Computes the loss for Faster R-CNN. This requires that the subsample method has been called beforehand. Arguments: class_logits (list[Tensor]) box_regression (list[Tensor]) Returns: classification_loss (Tensor) box_loss (Tensor) """ class_logits = cat(class_logits, dim=0) box_regression = cat(box_regression, dim=0) device = class_logits.device if not hasattr(self, "_proposals"): raise RuntimeError("subsample needs to be called before") proposals = self._proposals labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0) regression_targets = cat( [proposal.get_field("regression_targets") for proposal in proposals], dim=0 ) classification_loss = F.cross_entropy(class_logits, labels) # get indices that correspond to the regression targets for # the corresponding ground truth labels, to be used with # advanced indexing sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1) labels_pos = labels[sampled_pos_inds_subset] map_inds = 4 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3], device=device) box_loss = smooth_l1_loss( box_regression[sampled_pos_inds_subset[:, None], map_inds], regression_targets[sampled_pos_inds_subset], size_average=False, beta=1, ) box_loss = box_loss / labels.numel() return classification_loss, box_loss
def _evaluate_step(self, x, y, a, j): """ Computes the loss, number of guesses correct, and total number of guesses at the jth time step. The loss for a string is considered to be 0 if the neural network is still reading the input string. :type x: Variable :param x: The input data, represented as a 3D tensor. Each example consists of a string of 0s and 1s, followed by "null"s. All symbols are in one-hot representation :type y: Variable :param y: The output data, represented as a 2D tensor. Each example consists of a sequence of "null"s, followed by a string backwards. All symbols are represented numerically :type a: Variable :param a: The output of the neural network at the jth time step, represented as a 2D vector. For each i, a[i, :] is the output of the neural network at the jth time step, in one- hot representation :type j: int :param j: This function is called during the jth time step of the neural network's computation :rtype: tuple :return: The loss, number of correct guesses, and number of total guesses at the jth time step """ indices = (y[:, j] != self.alphabet[self.null]) # Indexing semantics in the line below were changed in different versions of pytorch. valid_a = a[indices.view(-1)].view(-1, self.alphabet_size) valid_y = y[:, j][indices] if len(valid_a) == 0: return None, None, None _, valid_y_ = torch.max(valid_a, 1) total = len(valid_a) correct = len(torch.nonzero((valid_y_ == valid_y).data)) loss = self.criterion(valid_a, valid_y) return loss, correct, total
def non_match_descriptor_loss(image_a_pred, image_b_pred, non_matches_a, non_matches_b, M=0.5, invert=False): """ Computes the max(0, M - D(I_a,I_b,u_a,u_b))^2 term This is effectively: "a and b should be AT LEAST M away from each other" With invert=True, this is: "a and b should be AT MOST M away from each other" :param image_a_pred: Output of DCN network on image A. :type image_a_pred: torch.Variable(torch.FloatTensor) shape [1, W * H, D] :param image_b_pred: same as image_a_pred :type image_b_pred: :param non_matches_a: torch.Variable(torch.FloatTensor) has shape [num_non_matches,], a (u,v) pair is mapped to (u,v) ---> image_width * v + u, this matches the shape of image_a_pred :type non_matches_a: torch.Variable(torch.FloatTensor) :param non_matches_b: same as non_matches_a :param M: the margin :type M: float :return: torch.FloatTensor with shape torch.Shape([num_non_matches]) :rtype: """ non_matches_a_descriptors = torch.index_select(image_a_pred, 1, non_matches_a).squeeze() non_matches_b_descriptors = torch.index_select(image_b_pred, 1, non_matches_b).squeeze() # crazily enough, if there is only one element to index_select into # above, then the first dimension is collapsed down, and we end up # with shape [D,], where we want [1,D] # this unsqueeze fixes that case if len(non_matches_a) == 1: non_matches_a_descriptors = non_matches_a_descriptors.unsqueeze(0) non_matches_b_descriptors = non_matches_b_descriptors.unsqueeze(0) norm_degree = 2 non_match_loss = (non_matches_a_descriptors - non_matches_b_descriptors).norm(norm_degree, 1) if not invert: non_match_loss = torch.clamp(M - non_match_loss, min=0).pow(2) else: non_match_loss = torch.clamp(non_match_loss - M, min=0).pow(2) hard_negative_idxs = torch.nonzero(non_match_loss) num_hard_negatives = len(hard_negative_idxs) return non_match_loss, num_hard_negatives, non_matches_a_descriptors, non_matches_b_descriptors
def forward(self, s, a, ls): with torch.no_grad(): embedded = self.embedding(s.cuda()) a_embedded = self.embedding(a.cuda()) # Average the aspect embedding a_new_embedded = torch.zeros(len(s),1,100) for i in range(len(a_embedded)): if len(torch.nonzero(a_embedded[i])): a_new_embedded[i] = torch.unsqueeze(torch.sum(a_embedded[i].cuda(), 0)/len(torch.nonzero(a_embedded[i].cuda())),0) a_embedded = a_new_embedded """ embedded = torch.zeros(len(s),20,200) # Concatenate each word in sentence with aspect vector zero_tag = torch.zeros(100) for i in range(len(s_embedded)): for j in range(20): if j<(ls[i]-1): embedded[i][j] = torch.unsqueeze(torch.cat((s_embedded[i][j],torch.squeeze(a_embedded[i],0)),0),0) else: embedded[i][j] = torch.unsqueeze(torch.cat((s_embedded[i][j],zero_tag),0),0) """ out, (h, c) = self.lstm1(embedded) with torch.no_grad(): new_embedded = torch.zeros(len(s), 20, 612) zero_tag = torch.zeros(100).cuda() for i in range(len(out)): for j in range(20): if j<(ls[i]-1): new_embedded[i][j] = torch.unsqueeze(torch.cat((out[i][j].cuda(),torch.squeeze(a_embedded[i].cuda(),0)),0),0) else: new_embedded[i][j] = torch.unsqueeze(torch.cat((out[i][j].cuda(),zero_tag),0),0) out2, (h2, c2) = self.lstm2(new_embedded.cuda()) hidden = self.dropout(torch.cat((h2[-2,:,:], h2[-1,:,:]), dim=1)) hidden2pred = self.fc(hidden) pred = self.softmax(hidden2pred) return pred
def select_top_predictions(self, predictions): """ Select only predictions which have a `score` > self.confidence_threshold, and returns the predictions in descending order of score Arguments: predictions (BoxList): the result of the computation by the model. It should contain the field `scores`. Returns: prediction (BoxList): the detected objects. Additional information of the detection properties can be found in the fields of the BoxList via `prediction.fields()` """ scores = predictions.get_field("scores") keep = torch.nonzero(scores > self.confidence_threshold).squeeze(1) predictions = predictions[keep] scores = predictions.get_field("scores") _, idx = scores.sort(0, descending=True) return predictions[idx]
def _nms(boxes, overlap_threshold=0.5, mode='union'): # This native torch implementation is slow # on cuda for cuda tensors x1 = boxes[:, 0] y1 = boxes[:, 1] x2 = boxes[:, 2] y2 = boxes[:, 3] scores = boxes[:, 4] areas = (x2 - x1 + 1) * (y2 - y1 + 1) _, order = scores.sort(dim=0, descending=True) ind_buffer = torch.zeros(scores.shape, dtype=torch.long) i = 0 while order.size()[0] > 1: ind_buffer[i] = order[0] i += 1 xx1 = torch.max(x1[order[0]], x1[order[1:]]) yy1 = torch.max(y1[order[0]], y1[order[1:]]) xx2 = torch.min(x2[order[0]], x2[order[1:]]) yy2 = torch.min(y2[order[0]], y2[order[1:]]) # w = F.relu(xx2 - xx1) # h = F.relu(yy2 - yy1) w = torch.clamp(xx2 - xx1 + 1, min=0) h = torch.clamp(yy2 - yy1 + 1, min=0) inter = w * h if mode == 'min': ovr = inter / torch.min(areas[order[0]], areas[order[1:]]) else: ovr = inter / (areas[order[0]] + areas[order[1:]] - inter) inds = torch.nonzero(ovr <= overlap_threshold).squeeze() if inds.dim(): order = order[(inds + 1)] else: break keep = ind_buffer[:i] return keep
def evaluate(test_X, test_Y): model.eval() total_loss = 0. digits_correct = 0 digits_total = 0 len_X = test_X.size(0) # # Buffered model zero = Variable(torch.zeros(len_X, 3)) num_iterations = TIME_FN(2 * MAX_LENGTH) model.init_model(len_X, test_X) for j in xrange(num_iterations): model.forward() for j in xrange(MAX_LENGTH): model._buffer_out.pop(1.) a = model._buffer_out.read(1.) # # Normal seq2seq # model.init_stack(len(test_X.data)) # for j in xrange(2 * MAX_LENGTH): # a = model.forward(test_X[:,j,:]) indices = test_Y[:, j] != 2 valid_a = a[indices.view(-1, 1)].view(-1, 3) valid_Y = test_Y[:, j][indices] if len(valid_a) == 0: continue _, valid_y_ = torch.max(valid_a, 1) digits_total += len(valid_a) digits_correct += len(torch.nonzero((valid_y_ == valid_Y).data)) total_loss += criterion(valid_a, valid_Y) mean_loss = sum(total_loss.data) print "epoch {}: loss={:.4f}, acc={:.2f}".format(epoch, mean_loss, digits_correct / digits_total)
def _evaluate_step(self, x, y, a, j): """ Computes the loss, number of guesses correct, and total number of guesses at the jth time step. :type x: Variable :param x: The input data, represented as a 3D tensor :type y: Variable :param y: The output data, represented as a 2D tensor :type a: Variable :param a: The output of the neural network at the jth time step, represented as a 2D vector :type j: int :param j: This function is called during the jth time step of the neural network's computation :rtype: tuple :return: The loss, number of correct guesses, and number of total guesses at the jth time step """ indices = (y[:, j] != self.alphabet[self.null]) # Indexing conventions changed with PyTorch version. valid_a = a[indices.view(-1)].view(-1, self.alphabet_size) valid_y = y[:, j][indices] if len(valid_a) == 0: return None, None, None _, valid_y_ = torch.max(valid_a, 1) total = len(valid_a) correct = len(torch.nonzero((valid_y_ == valid_y).data)) loss = self.criterion(valid_a, valid_y) return loss, correct, total
def assign_wrt_overlaps(self, overlaps, gt_labels=None): """Assign w.r.t. the overlaps of bboxes with gts. -1:代表ignore 0:代表背景 其他值:gt_labels对应值 Args: overlaps (Tensor): Overlaps between k gt_bboxes and n bboxes, shape(k, n). gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ). Returns: :obj:`AssignResult`: The assign result. """ num_gts, num_bboxes = overlaps.size(0), overlaps.size(1) # 1. assign -1 by default # assigned_gt_inds:每个proposal对应的gt的id assigned_gt_inds = overlaps.new_full((num_bboxes, ), -1, dtype=torch.long) # 原来没有 if num_gts == 0 or num_bboxes == 0: # No ground truth or boxes, return empty assignment max_overlaps = overlaps.new_zeros((num_bboxes, )) if num_gts == 0: # No truth, assign everything to background assigned_gt_inds[:] = 0 if gt_labels is None: assigned_labels = None else: assigned_labels = overlaps.new_zeros((num_bboxes, ), dtype=torch.long) return AssignResult(num_gts, assigned_gt_inds, max_overlaps, labels=assigned_labels) # for each anchor, which gt best overlaps with it # for each anchor, the max iou of all gts max_overlaps, argmax_overlaps = overlaps.max(dim=0) # for each gt, which anchor best overlaps with it # for each gt, the max iou of all proposals gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=1) # 2. assign negative: below # 如果proposal和gt的最大iou小于一定阈值(neg_iou_thr)置0 if isinstance(self.neg_iou_thr, float): assigned_gt_inds[(max_overlaps >= 0) & (max_overlaps < self.neg_iou_thr)] = 0 elif isinstance(self.neg_iou_thr, tuple): assert len(self.neg_iou_thr) == 2 assigned_gt_inds[(max_overlaps >= self.neg_iou_thr[0]) & (max_overlaps < self.neg_iou_thr[1])] = 0 # 3. assign positive: above positive IoU threshold # 如果proposal和gt的最大iou大于一定阈值(pos_iou_thr),置是第几个gt # +1:id(第几个gt)从0开始,从而和0避开 pos_inds = max_overlaps >= self.pos_iou_thr assigned_gt_inds[pos_inds] = argmax_overlaps[pos_inds] + 1 # 4. assign fg: for each gt, proposals with highest IoU # 对于每个gt存在和proposal的IOU大于一定阈值(min_pos_iou),置是第几个gt for i in range(num_gts): if gt_max_overlaps[i] >= self.min_pos_iou: if self.gt_max_assign_all: # 是否所有与该gt具有该IOU值都置是第几个gt max_iou_inds = overlaps[i, :] == gt_max_overlaps[i] assigned_gt_inds[max_iou_inds] = i + 1 else: assigned_gt_inds[gt_argmax_overlaps[i]] = i + 1 # 根据assigned_gt_inds生成assigned_labels,即保存proposal对应gt的label if gt_labels is not None: assigned_labels = assigned_gt_inds.new_zeros((num_bboxes, )) pos_inds = torch.nonzero(assigned_gt_inds > 0).squeeze() if pos_inds.numel() > 0: # -1:因为和gt的id差1 assigned_labels[pos_inds] = gt_labels[ assigned_gt_inds[pos_inds] - 1] else: assigned_labels = None return AssignResult(num_gts, assigned_gt_inds, max_overlaps, labels=assigned_labels)
def selectProposal(posi_prop_idx, nega_prop_idx, posi_idx, nega_idx, max_prop=4000, ratio1=0.5, max_grasp=2000, ratio2=0.5): posi_prop_idx = posi_prop_idx.view(-1) nega_prop_idx = nega_prop_idx.view(-1) posi_idx = posi_idx.view(-1) nega_idx = nega_idx.view(-1) posi_num = posi_idx.size(0) nega_num = nega_idx.size(0) posi_prop_num = posi_prop_idx.size(0) nega_prop_num = nega_prop_idx.size(0) posi_num_exp = int(max_grasp * ratio2) nega_num_exp = max_grasp - posi_num_exp posi_prop_num_exp = int(max_prop * ratio1) nega_prop_num_exp = max_prop - posi_prop_num_exp if posi_num < posi_num_exp: choice = torch.cat([ torch.arange(0, posi_num).cuda().long(), torch.randint(posi_num, (posi_num_exp - posi_num, )).cuda().long() ], 0) posi_idx = posi_idx[choice.long()] else: choice = torch.LongTensor( np.random.choice(posi_num, posi_num_exp, replace=False)).cuda() posi_idx = posi_idx[choice] if nega_num < nega_num_exp: choice = torch.cat([ torch.arange(0, nega_num).cuda().long(), torch.randint(nega_num, (nega_num_exp - nega_num, )).cuda().long() ], 0) nega_idx = nega_idx[choice.long()] else: choice = torch.LongTensor( np.random.choice(nega_num, nega_num_exp, replace=False)).cuda() nega_idx = nega_idx[choice] if nega_prop_num < nega_prop_num_exp: choice = torch.cat([ torch.arange(0, nega_prop_num).cuda().long(), torch.randint(nega_prop_num, (nega_prop_num_exp - nega_prop_num, )).cuda().long() ], 0) nega_prop_idx = nega_prop_idx[choice.long()] else: choice = torch.LongTensor( np.random.choice(nega_prop_num, nega_prop_num_exp, replace=False)).cuda() nega_prop_idx = nega_prop_idx[choice] if posi_prop_num_exp > max_grasp: select = posi_prop_idx.new(posi_prop_num).zero_() select[posi_idx] = 1 select[nega_idx] = 1 un_select = torch.nonzero(select == 0).view(-1) choice = torch.LongTensor( np.random.choice(un_select.size(0), posi_prop_num_exp - max_grasp)).cuda() un_select = un_select[choice] posi_idx = posi_prop_idx[posi_idx] nega_idx = posi_prop_idx[nega_idx] posi_prop_idx = torch.cat( [posi_idx, nega_idx, posi_prop_idx[un_select]], 0) else: posi_idx = posi_prop_idx[posi_idx] nega_idx = posi_prop_idx[nega_idx] posi_prop_idx = torch.cat([posi_idx, nega_idx], 0) return posi_prop_idx, nega_prop_idx, posi_idx, nega_idx
def batch_find_pixel_correspondences(img_a_depth, img_a_pose, img_b_depth, img_b_pose, uv_a=None, num_attempts=20, device='CPU', img_a_mask=None, K=None): """ Computes pixel correspondences in batch :param img_a_depth: depth image for image a :type img_a_depth: numpy 2d array (H x W) encoded as a uint16 -- :param img_a_pose: pose for image a, in right-down-forward optical frame :type img_a_pose: numpy 2d array, 4 x 4 (homogeneous transform) -- :param img_b_depth: depth image for image b :type img_b_depth: numpy 2d array (H x W) encoded as a uint16 -- :param img_b_pose: pose for image a, in right-down-forward optical frame :type img_b_pose: numpy 2d array, 4 x 4 (homogeneous transform) -- :param uv_a: optional arg, a tuple of (u,v) pixel positions for which to find matches :type uv_a: each element of tuple is either an int, or a list-like (castable to torch.LongTensor) -- :param num_attempts: if random sampling, how many pixels will be _attempted_ to find matches for. Note that this is not the same as asking for a specific number of matches, since many attempted matches will either be occluded or outside of field-of-view. :type num_attempts: int -- :param device: either 'CPU' or 'CPU' :type device: string -- :param img_a_mask: optional arg, an image where each nonzero pixel will be used as a mask :type img_a_mask: ndarray, of shape (H, W) -- :param K: optional arg, an image where each nonzero pixel will be used as a mask :type K: ndarray, of shape (H, W) -- :return: "Tuple of tuples", i.e. pixel position tuples for image a and image b (uv_a, uv_b). Each of these is a tuple of pixel positions :rtype: Each of uv_a is a tuple of torch.FloatTensors """ assert (img_a_depth.shape == img_b_depth.shape) image_width = img_a_depth.shape[1] image_height = img_b_depth.shape[0] global dtype_float global dtype_long if device == 'CPU': dtype_float = torch.FloatTensor dtype_long = torch.LongTensor if device =='GPU': dtype_float = torch.cuda.FloatTensor dtype_long = torch.cuda.LongTensor if uv_a is None: uv_a = pytorch_rand_select_pixel(width=image_width,height=image_height, num_samples=num_attempts) else: uv_a = (torch.LongTensor([uv_a[0]]).type(dtype_long), torch.LongTensor([uv_a[1]]).type(dtype_long)) num_attempts = 1 if img_a_mask is None: uv_a_vec = (torch.ones(num_attempts).type(dtype_long)*uv_a[0],torch.ones(num_attempts).type(dtype_long)*uv_a[1]) uv_a_vec_flattened = uv_a_vec[1]*image_width+uv_a_vec[0] else: img_a_mask = torch.from_numpy(img_a_mask).type(dtype_float) # Option A: This next line samples from img mask uv_a_vec = random_sample_from_masked_image_torch(img_a_mask, num_samples=num_attempts) if uv_a_vec[0] is None: return (None, None) # Option B: These 4 lines grab ALL from img mask # mask_a = img_a_mask.squeeze(0) # mask_a = mask_a/torch.max(mask_a) # nonzero = (torch.nonzero(mask_a)).type(dtype_long) # uv_a_vec = (nonzero[:,1], nonzero[:,0]) # Always use this line uv_a_vec_flattened = uv_a_vec[1]*image_width+uv_a_vec[0] if K is None: K = get_default_K_matrix() K_inv = inv(K) body_to_rdf = get_body_to_rdf() rdf_to_body = inv(body_to_rdf) img_a_depth_torch = torch.from_numpy(img_a_depth).type(dtype_float) img_a_depth_torch = torch.squeeze(img_a_depth_torch, 0) img_a_depth_torch = img_a_depth_torch.view(-1,1) depth_vec = torch.index_select(img_a_depth_torch, 0, uv_a_vec_flattened)*1.0/DEPTH_IM_SCALE depth_vec = depth_vec.squeeze(1) # Prune based on # Case 1: depth is zero (for this data, this means no-return) nonzero_indices = torch.nonzero(depth_vec) if nonzero_indices.dim() == 0: return (None, None) nonzero_indices = nonzero_indices.squeeze(1) depth_vec = torch.index_select(depth_vec, 0, nonzero_indices) # prune u_vec and v_vec, then multiply by already pruned depth_vec u_a_pruned = torch.index_select(uv_a_vec[0], 0, nonzero_indices) u_vec = u_a_pruned.type(dtype_float)*depth_vec v_a_pruned = torch.index_select(uv_a_vec[1], 0, nonzero_indices) v_vec = v_a_pruned.type(dtype_float)*depth_vec z_vec = depth_vec full_vec = torch.stack((u_vec, v_vec, z_vec)) K_inv_torch = torch.from_numpy(K_inv).type(dtype_float) point_camera_frame_rdf_vec = K_inv_torch.mm(full_vec) point_world_frame_rdf_vec = apply_transform_torch(point_camera_frame_rdf_vec, torch.from_numpy(img_a_pose).type(dtype_float)) point_camera_2_frame_rdf_vec = apply_transform_torch(point_world_frame_rdf_vec, torch.from_numpy(invert_transform(img_b_pose)).type(dtype_float)) K_torch = torch.from_numpy(K).type(dtype_float) vec2_vec = K_torch.mm(point_camera_2_frame_rdf_vec) u2_vec = vec2_vec[0]/vec2_vec[2] v2_vec = vec2_vec[1]/vec2_vec[2] maybe_z2_vec = point_camera_2_frame_rdf_vec[2] z2_vec = vec2_vec[2] # Prune based on # Case 2: the pixels projected into image b are outside FOV # u2_vec bounds should be: 0, image_width # v2_vec bounds should be: 0, image_height ## do u2-based pruning u2_vec_lower_bound = 0.0 epsilon = 1e-3 u2_vec_upper_bound = image_width*1.0 - epsilon # careful, needs to be epsilon less!! lower_bound_vec = torch.ones_like(u2_vec) * u2_vec_lower_bound upper_bound_vec = torch.ones_like(u2_vec) * u2_vec_upper_bound zeros_vec = torch.zeros_like(u2_vec) u2_vec = where(u2_vec < lower_bound_vec, zeros_vec, u2_vec) u2_vec = where(u2_vec > upper_bound_vec, zeros_vec, u2_vec) in_bound_indices = torch.nonzero(u2_vec) if in_bound_indices.dim() == 0: return (None, None) in_bound_indices = in_bound_indices.squeeze(1) # apply pruning u2_vec = torch.index_select(u2_vec, 0, in_bound_indices) v2_vec = torch.index_select(v2_vec, 0, in_bound_indices) z2_vec = torch.index_select(z2_vec, 0, in_bound_indices) u_a_pruned = torch.index_select(u_a_pruned, 0, in_bound_indices) # also prune from first list v_a_pruned = torch.index_select(v_a_pruned, 0, in_bound_indices) # also prune from first list ## do v2-based pruning v2_vec_lower_bound = 0.0 v2_vec_upper_bound = image_height*1.0 - epsilon lower_bound_vec = torch.ones_like(v2_vec) * v2_vec_lower_bound upper_bound_vec = torch.ones_like(v2_vec) * v2_vec_upper_bound zeros_vec = torch.zeros_like(v2_vec) v2_vec = where(v2_vec < lower_bound_vec, zeros_vec, v2_vec) v2_vec = where(v2_vec > upper_bound_vec, zeros_vec, v2_vec) in_bound_indices = torch.nonzero(v2_vec) if in_bound_indices.dim() == 0: return (None, None) in_bound_indices = in_bound_indices.squeeze(1) # apply pruning u2_vec = torch.index_select(u2_vec, 0, in_bound_indices) v2_vec = torch.index_select(v2_vec, 0, in_bound_indices) z2_vec = torch.index_select(z2_vec, 0, in_bound_indices) u_a_pruned = torch.index_select(u_a_pruned, 0, in_bound_indices) # also prune from first list v_a_pruned = torch.index_select(v_a_pruned, 0, in_bound_indices) # also prune from first list # Prune based on # Case 3: the pixels in image b are occluded, OR there is no depth return in image b so we aren't sure img_b_depth_torch = torch.from_numpy(img_b_depth).type(dtype_float) img_b_depth_torch = torch.squeeze(img_b_depth_torch, 0) img_b_depth_torch = img_b_depth_torch.view(-1,1) uv_b_vec_flattened = (v2_vec.type(dtype_long)*image_width+u2_vec.type(dtype_long)) # simply round to int -- good enough # occlusion check for smooth surfaces depth2_vec = torch.index_select(img_b_depth_torch, 0, uv_b_vec_flattened)*1.0/1000 depth2_vec = depth2_vec.squeeze(1) # occlusion margin, in meters occlusion_margin = 0.003 z2_vec = z2_vec - occlusion_margin zeros_vec = torch.zeros_like(depth2_vec) depth2_vec = where(depth2_vec < zeros_vec, zeros_vec, depth2_vec) # to be careful, prune any negative depths depth2_vec = where(depth2_vec < z2_vec, zeros_vec, depth2_vec) # prune occlusions non_occluded_indices = torch.nonzero(depth2_vec) if non_occluded_indices.dim() == 0: return (None, None) non_occluded_indices = non_occluded_indices.squeeze(1) depth2_vec = torch.index_select(depth2_vec, 0, non_occluded_indices) # apply pruning u2_vec = torch.index_select(u2_vec, 0, non_occluded_indices) v2_vec = torch.index_select(v2_vec, 0, non_occluded_indices) u_a_pruned = torch.index_select(u_a_pruned, 0, non_occluded_indices) # also prune from first list v_a_pruned = torch.index_select(v_a_pruned, 0, non_occluded_indices) # also prune from first list uv_b_vec = (u2_vec, v2_vec) uv_a_vec = (u_a_pruned, v_a_pruned) return (uv_a_vec, uv_b_vec)
def torch_non_max_suppression(detections, confidence_threshold, num_classes, nms_conf): # Check all boxes which have object confidence less than threshold object_confidence_mask = (detections[:, :, 4] > confidence_threshold).float() # Add a dimension for multiplying object_confidence_mask = object_confidence_mask.unsqueeze(2) # Apply mask to detections detections = detections * object_confidence_mask # Calculate top-left and right-bottom coordinate box_corner = detections.new(detections.shape) # top-left x-coordinate = centre_x - width / 2 box_corner[:, :, 0] = detections[:, :, 0] - detections[:, :, 2] / 2 # top-left y-coordinate = centre_y - height / 2 box_corner[:, :, 1] = detections[:, :, 1] - detections[:, :, 3] / 2 # right-bottom x-coordinate = centre_x + width / 2 box_corner[:, :, 2] = detections[:, :, 0] + detections[:, :, 2] / 2 # right-bottom y-coordinate = centre_y + height / 2 box_corner[:, :, 3] = detections[:, :, 1] + detections[:, :, 3] / 2 # Transform bx, by, bw, bh to top_left_x, top_left_y, right_bottom_x, right_bottom_y detections[:, :, :4] = box_corner[:, :, :4] batch_size = detections.size(0) write = False for index in range(batch_size): image_prediction = detections[index] # 10647 x 85 # max_confidence, max_confidence_class: 10647 max_confidence, max_confidence_class = torch.max(input=image_prediction[:, 5:5 + num_classes], dim=1) # Add a dimension for multiplying max_confidence = max_confidence.float().unsqueeze(1) max_confidence_class = max_confidence_class.float().unsqueeze(1) # Concatenate image_prediction, max_confidence and max_confidence_class sequence = (image_prediction[:, :5], max_confidence, max_confidence_class) image_prediction = torch.cat(sequence, dim=1) # Get rid of bounding-boxes which have object confidence less than threshold # Get index of elements which have non-zero value non_zero_index = torch.nonzero(image_prediction[:, 4]) try: # 7 is: bx, by, bw, bh, object_confidence, max_confidence, max_confidence_class # After this step, our prediction only have some boxes for each class # Perform non-max suppression to get rid of boxes which have low IoU image_prediction_ = image_prediction[non_zero_index.squeeze(), :].view(-1, 7) except: # In this case, there is not any detection continue try: image_classes = unique(image_prediction_[:, -1]) # The last index is the class index except IndexError: print('There is no object in this image') continue for class_ in image_classes: class_mask = image_prediction_ * (image_prediction_[:, -1] == class_).float().unsqueeze(1) class_mask_index = torch.nonzero(class_mask[:, -2]).squeeze() image_prediction_class = image_prediction_[class_mask_index].view(-1, 7) # Sorting image_prediction_class by object confidence conf_sort_index = torch.sort(image_prediction_class[:, 4], descending=True)[1] image_prediction_class = image_prediction_class[conf_sort_index] # Perform IoU no_of_boxes = image_prediction_class.size(0) for box_index in range(no_of_boxes): # Get the IOUs of all boxes that come after the one we are looking at in the loop try: ious = bbox_iou(image_prediction_class[box_index].unsqueeze(0), image_prediction_class[box_index + 1:]) except ValueError: break except IndexError: break # Zero out all the detections that have IoU > treshhold iou_mask = (ious < nms_conf).float().unsqueeze(1) image_prediction_class[box_index + 1:] *= iou_mask # Remove the non-zero entries non_zero_index = torch.nonzero(image_prediction_class[:, 4]).squeeze() image_prediction_class = image_prediction_class[non_zero_index].view(-1, 7) batch_index = image_prediction_class.new(image_prediction_class.size(0), 1).fill_(index) # Repeat the batch_id for as many detections of the class cls in the image seq = batch_index, image_prediction_class if not write: output = torch.cat(seq, 1) write = True else: out = torch.cat(seq, 1) output = torch.cat((output, out)) try: return output except: return 0
def __getitem__(self, index): ### input A (label maps) A_path = self.A_paths[index] A = Image.open(A_path) w, h = A.size max_size = max(w, h) if self.opt.longSize != max_size: scale_size = float(self.opt.longSize / max_size) new_w = int(scale_size * w) new_h = int(scale_size * h) A = A.resize((new_w, new_h), Image.NEAREST) # if self.opt.isTrain or self.opt.random_embed==False: B_path = self.B_paths[index] B = Image.open(B_path).convert('RGB') B = B.resize((new_w, new_h), Image.BICUBIC) else: # if self.opt.isTrain or self.opt.random_embed==False: B_path = self.B_paths[index] B = Image.open(B_path).convert('RGB') C_tensor = 0 A_tensor = transforms.functional.to_tensor(A) * 255.0 B_tensor = transforms.functional.to_tensor(B) real_B_tensor = B_tensor.clone() mask_bg = (A_tensor == 0).type(torch.FloatTensor) B_tensor = torch.clamp( B_tensor + mask_bg * torch.ones(A_tensor.size()), 0, 1) B = transforms.functional.to_pil_image(B_tensor) if self.opt.data_augmentation == True: assert self.opt.isTrain == True rotate, scale, shear = random.random() - 0.5, random.random( ) - 0.5, random.random() - 0.5 rotate, scale, shear = 0, 0, 0 B = transforms.functional.affine(B, 20 * rotate, [0, 0], 1 + 0.2 * scale, 10 * shear, resample=Image.BICUBIC) A = transforms.functional.affine(A, 20 * rotate, [0, 0], 1 + 0.2 * scale, 10 * shear, resample=Image.NEAREST) C_tensor = transforms.functional.to_tensor(B) C_tensor = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(C_tensor) # if self.opt.isTrain or self.opt.random_embed==False: B_tensor = transforms.functional.to_tensor(B) B_tensor = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(B_tensor) real_B_tensor = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(real_B_tensor) # else: # B_tensor = 0 # get mean of left eye, right eye, mouth # first y next x A_tensor = transforms.functional.to_tensor(A) * 255.0 mask_tensor = torch.zeros(6) try: mask_left_eye_r = torch.nonzero(A_tensor == 4) this_top = int(torch.min(mask_left_eye_r, 0)[0][1]) this_left = int(torch.min(mask_left_eye_r, 0)[0][2]) this_bottom = int(torch.max(mask_left_eye_r, 0)[0][1]) this_right = int(torch.max(mask_left_eye_r, 0)[0][2]) x_mean = int((this_left + this_right) / 2) y_mean = int((this_top + this_bottom) / 2) mask_tensor[0] = y_mean mask_tensor[1] = x_mean # mask_list.append(x_mean) # mask_list.append(y_mean) except: print("left eye problem ------------------") print(A_path) mask_tensor[0] = 116 mask_tensor[1] = 96 # mask_list.append(116) # mask_list.append(96) try: mask_right_eye_r = torch.nonzero(A_tensor == 5) this_top = int(torch.min(mask_right_eye_r, 0)[0][1]) this_left = int(torch.min(mask_right_eye_r, 0)[0][2]) this_bottom = int(torch.max(mask_right_eye_r, 0)[0][1]) this_right = int(torch.max(mask_right_eye_r, 0)[0][2]) x_mean = int((this_left + this_right) / 2) y_mean = int((this_top + this_bottom) / 2) mask_tensor[2] = y_mean mask_tensor[3] = x_mean # mask_list.append(x_mean) # mask_list.append(y_mean) except: print("right eye problem --------------") print(A_path) mask_tensor[2] = 116 mask_tensor[3] = 160 # mask_list.append(116) # mask_list.append(160) try: mask_mouth_r = torch.nonzero((A_tensor == 7) + (A_tensor == 8) + (A_tensor == 9)) this_top = int(torch.min(mask_mouth_r, 0)[0][1]) this_left = int(torch.min(mask_mouth_r, 0)[0][2]) this_bottom = int(torch.max(mask_mouth_r, 0)[0][1]) this_right = int(torch.max(mask_mouth_r, 0)[0][2]) x_mean = int((this_left + this_right) / 2) y_mean = int((this_top + this_bottom) / 2) mask_tensor[4] = y_mean mask_tensor[5] = x_mean except: print("mouth problem --------------") print(A_path) mask_tensor[4] = 184 mask_tensor[5] = 128 # mask_list.append(184) # or 180 # mask_list.append(128) assert 16 < mask_tensor[0] < 256 - 16 assert 24 < mask_tensor[1] < 256 - 24 assert 16 < mask_tensor[2] < 256 - 16 assert 24 < mask_tensor[3] < 256 - 24 assert 40 < mask_tensor[4] < 256 - 40 assert 72 < mask_tensor[5] < 256 - 72 # A_tensor = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(A_tensor) * 255.0 inst_tensor = feat_tensor = 0 A_tensor = self.append_region(A, A_tensor, mask_tensor) # --------------------------------------------------------------------------------------------------------------- mask_A_path = self.mask_A_paths[index] mask_A = Image.open(mask_A_path) # params = get_params(self.opt, mask_A.size) mask_A_tensor = transforms.functional.to_tensor(mask_A) * 255.0 w, h = mask_A.size max_size = max(w, h) if self.opt.longSize != max_size: scale_size = float(self.opt.longSize / max_size) new_w = int(scale_size * w) new_h = int(scale_size * h) mask_A = mask_A.resize((new_w, new_h), Image.NEAREST) # if self.opt.isTrain or self.opt.random_embed==False: mask_B_path = self.mask_B_paths[index] mask_B = Image.open(mask_B_path).convert('RGB') mask_B = mask_B.resize((new_w, new_h), Image.BICUBIC) else: # if self.opt.isTrain or self.opt.random_embed==False: mask_B_path = self.mask_B_paths[index] mask_B = Image.open(mask_B_path).convert('RGB') mask_A_tensor = transforms.functional.to_tensor(mask_A) * 255.0 mask_B_tensor = transforms.functional.to_tensor(mask_B) real_mask_B_tensor = mask_B_tensor.clone() mask_bg = (mask_A_tensor == 0).type(torch.FloatTensor) mask_B_tensor = torch.clamp( mask_B_tensor + mask_bg * torch.ones(mask_A_tensor.size()), 0, 1) mask_B = transforms.functional.to_pil_image(mask_B_tensor) if self.opt.data_augmentation == True: assert self.opt.isTrain == True rotate, scale, shear = random.random() - 0.5, random.random( ) - 0.5, random.random() - 0.5 rotate, scale, shear = 0, 0, 0 mask_B = transforms.functional.affine(mask_B, 20 * rotate, [0, 0], 1 + 0.2 * scale, 10 * shear, resample=Image.BICUBIC) mask_A = transforms.functional.affine(mask_A, 20 * rotate, [0, 0], 1 + 0.2 * scale, 10 * shear, resample=Image.NEAREST) # if self.opt.isTrain or self.opt.random_embed==False: mask_B_tensor = transforms.functional.to_tensor(mask_B) mask_B_tensor = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(mask_B_tensor) real_mask_B_tensor = transforms.Normalize( (0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(real_mask_B_tensor) mask_A_tensor = transforms.functional.to_tensor(mask_A) * 255.0 mask_tensor2 = torch.zeros(6) try: mask_left_eye_r = torch.nonzero(mask_A_tensor == 4) this_top = int(torch.min(mask_left_eye_r, 0)[0][1]) this_left = int(torch.min(mask_left_eye_r, 0)[0][2]) this_bottom = int(torch.max(mask_left_eye_r, 0)[0][1]) this_right = int(torch.max(mask_left_eye_r, 0)[0][2]) x_mean = int((this_left + this_right) / 2) y_mean = int((this_top + this_bottom) / 2) mask_tensor2[0] = y_mean mask_tensor2[1] = x_mean # mask_list.append(x_mean) # mask_list.append(y_mean) except: print("left eye problem ------------------") print(mask_A_path) mask_tensor2[0] = 116 mask_tensor2[1] = 96 # mask_list.append(116) # mask_list.append(96) try: mask_right_eye_r = torch.nonzero(mask_A_tensor == 5) this_top = int(torch.min(mask_right_eye_r, 0)[0][1]) this_left = int(torch.min(mask_right_eye_r, 0)[0][2]) this_bottom = int(torch.max(mask_right_eye_r, 0)[0][1]) this_right = int(torch.max(mask_right_eye_r, 0)[0][2]) x_mean = int((this_left + this_right) / 2) y_mean = int((this_top + this_bottom) / 2) mask_tensor2[2] = y_mean mask_tensor2[3] = x_mean # mask_list.append(x_mean) # mask_list.append(y_mean) except: print("right eye problem --------------") print(mask_A_path) mask_tensor2[2] = 116 mask_tensor2[3] = 160 # mask_list.append(116) # mask_list.append(160) try: mask_mouth_r = torch.nonzero((mask_A_tensor == 7) + (mask_A_tensor == 8) + (mask_A_tensor == 9)) this_top = int(torch.min(mask_mouth_r, 0)[0][1]) this_left = int(torch.min(mask_mouth_r, 0)[0][2]) this_bottom = int(torch.max(mask_mouth_r, 0)[0][1]) this_right = int(torch.max(mask_mouth_r, 0)[0][2]) x_mean = int((this_left + this_right) / 2) y_mean = int((this_top + this_bottom) / 2) mask_tensor2[4] = y_mean mask_tensor2[5] = x_mean except: print("mouth problem --------------") print(mask_A_path) mask_tensor2[4] = 184 mask_tensor2[5] = 128 # mask_list.append(184) # or 180 # mask_list.append(128) assert 16 < mask_tensor2[0] < 256 - 16 assert 24 < mask_tensor2[1] < 256 - 24 assert 16 < mask_tensor2[2] < 256 - 16 assert 24 < mask_tensor2[3] < 256 - 24 assert 40 < mask_tensor2[4] < 256 - 40 assert 72 < mask_tensor2[5] < 256 - 72 mask_A_tensor = self.append_region(mask_A, mask_A_tensor, mask_tensor2) input_dict = { 'label': A_tensor, 'inst': inst_tensor, 'image': B_tensor, 'mask2': mask_tensor2, 'bg_styleimage': real_B_tensor, 'bg_contentimage': real_mask_B_tensor, 'feat': feat_tensor, 'path': A_path, 'image_affine': C_tensor, 'mask': mask_tensor, 'label2': mask_A_tensor } # content image: bg_contentimage, label2, mask2 # style image: bg_styleimage, label, mask, label,image_affine return input_dict
def __call__(self, locations, box_cls, box_regression, centerness, targets): """ Arguments: locations (list[BoxList]) box_cls (list[Tensor]) box_regression (list[Tensor]) centerness (list[Tensor]) targets (list[BoxList]) Returns: cls_loss (Tensor) reg_loss (Tensor) centerness_loss (Tensor) """ # 0 fpn 第一层 N = box_cls[0].size(0) num_classes = box_cls[0].size(1) #//self.num_pts # level first labels, reg_targets = self.prepare_targets(locations, targets) box_cls_flatten = [] box_regression_flatten = [] centerness_flatten = [] labels_flatten = [] reg_targets_flatten = [] # for level for l in range(len(labels)): # batch*num_pos num_classes box_cls_flatten.append(box_cls[l].permute(0, 2, 3, 1).reshape( -1, num_classes)) box_regression_flatten.append(box_regression[l].permute( 0, 2, 3, 1).reshape(-1, 5)) # layer_h, layer_w = box_cls[l].size(2), box_cls[l].size(3) # box_cls_flatten.append(box_cls[l].permute(0, 2, 3, 1).reshape(N, layer_h, layer_w, self.num_pts, num_classes).permute(0, 3, 1, 2, 4).reshape(-1,num_classes)) # box_regression_flatten.append(box_regression[l].permute(0, 2, 3, 1).reshape(N, layer_h, layer_w, self.num_pts, 5).permute(0, 3, 1, 2, 4).reshape(-1,5)) labels_flatten.append(labels[l].reshape(-1)) #*******************************************# reg_targets_flatten.append(reg_targets[l].reshape(-1, 7)) #*******************************************# centerness_flatten.append(centerness[l].reshape(-1)) # level batch*num_pos num_classes box_cls_flatten = torch.cat(box_cls_flatten, dim=0) box_regression_flatten = torch.cat(box_regression_flatten, dim=0) centerness_flatten = torch.cat(centerness_flatten, dim=0) labels_flatten = torch.cat(labels_flatten, dim=0) reg_targets_flatten = torch.cat(reg_targets_flatten, dim=0) pos_inds = torch.nonzero(labels_flatten > 0).squeeze(1) # wrong # cls_weight=torch.where(centerness_flatten==0, torch.ones_like(centerness_flatten), centerness_flatten).unsqueeze(-1) # cls_loss = self.cls_loss_func( # box_cls_flatten,#.cpu() # labels_flatten.int(),#,#.cpu() # weight = cls_weight # ) / (pos_inds.numel() + N) # add N to avoid dividing by a zero #*******************************************# all_centerness_weights = reg_targets_flatten[:, -2] #*******************************************# # torch.sqrt( # cls_weight=torch.where(all_centerness_weights==0, torch.ones_like(all_centerness_weights), all_centerness_weights).unsqueeze(-1) cls_weight = torch.where(all_centerness_weights == 0, torch.full_like(all_centerness_weights, 0.8), all_centerness_weights).unsqueeze(-1) # print(cls_weight) # print((all_centerness_weights==0).sum(), (cls_weight==0).sum()) # 并不是所有点都是正样本 cls_loss = 2 * self.cls_loss_func( box_cls_flatten, #.cpu() labels_flatten.int(), #.cpu() weight=cls_weight) / (pos_inds.numel() + N ) # add N to avoid dividing by a zero box_regression_flatten = box_regression_flatten[pos_inds] reg_targets_flatten = reg_targets_flatten[pos_inds] centerness_flatten = centerness_flatten[pos_inds] #*******************************************# if pos_inds.numel() > 0: # centerness_targets = self.compute_centerness_targets(reg_targets_flatten) # centerness_targets = reg_targets_flatten[:, -2] #这里是不是要和cls loss 保持一致 reg_loss = smooth_l1_loss( box_regression_flatten, #.cpu() reg_targets_flatten[:, : -2], #.cpu()#*******************************************# weight=reg_targets_flatten[:, -2].unsqueeze( -1) #cls_weight #****************** ) # 一定要回归center ness # reg_loss = torch.tensor(0) # print(centerness_targets) centerness_loss = self.centerness_loss_func( centerness_flatten, #.cpu() reg_targets_flatten[:, -1] #.cpu()#*******************************************# ) else: reg_loss = box_regression_flatten.sum() centerness_loss = centerness_flatten.sum() # .cuda() return cls_loss, reg_loss, 2 * centerness_loss #*0
def select_semihard(loss_values, margin): idcs = torch.nonzero((loss_values.view(-1) < margin) & (loss_values.view(-1) > 0)).view(-1) if len(idcs) == 0: return None choice = torch.randint(0, len(idcs), (1,), dtype=torch.long)[0] return idcs[choice]
def detect(self, bbx): with torch.no_grad(): vis = False thresh = 0.05 im_data = torch.FloatTensor(1).to(self.device) im_info = torch.FloatTensor(1).to(self.device) num_boxes = torch.LongTensor(1).to(self.device) gt_boxes = torch.FloatTensor(1).to(self.device) # total_tic = time.time() x, y, w, h = [int(p) for p in bbx] x = max(x, 0) y = max(y, 0) im = self.img[y:(y + h), x:(x + w)] # print ' (x=%d, y=%d), %d * %d, (%d, %d) - cropsize: %d * %d' % (x, y, w, h, x+w, y+h, im.shape[1], im.shape[0]) w, h = im.shape[1], im.shape[0] refine_bbx = [0, 0, w, h] if w * h == 0: print 'What? %d * %d' % (w, h) # raw_input('Continue?') return False blobs, im_scales = _get_image_blob(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.data.resize_(1, 1, 5).zero_() num_boxes.data.resize_(1).zero_() # pdb.set_trace() # det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = self.fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).to(self.device) box_deltas = box_deltas.view(1, -1, 4 * len(self.pascal_classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class _ = torch.from_numpy(np.tile(boxes, (1, scores.shape[1]))) pred_boxes = _.to(self.device) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() # det_toc = time.time() # detect_time = det_toc - det_tic # misc_tic = time.time() if vis: im2show = np.copy(im) j = 15 inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det step = 0 if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) cls_dets = cls_dets[keep.view(-1).long()] dets = cls_dets.cpu().numpy() for i in range(dets.shape[0]): if dets[i, -1] > cf: x1, y1, w1, h1 = dets[i][:4] det = [x1, y1, w1 - x1, h1 - y1] ratio = self.a_train_set.IOU(det, refine_bbx) if ratio[0] > iou: # IOU between prediction and detection should not be limited step += 1 if vis: print cls_dets dets = cls_dets.cpu().numpy() # for i in range(dets.shape[0]): # bbox = tuple(int(np.round(x)) for x in dets[i, :4]) # score = dets[i, -1] # if score > thresh: # crop = im[bbox[1]:bbox[3], bbox[0]:bbox[2]] # cv2.imwrite('in_place/%02d.jpg'%step, crop) # step += 1 im2show = vis_detections(im2show, self.pascal_classes[j], dets) # misc_toc = time.time() # nms_time = misc_toc - misc_tic if vis: cv2.imshow('test', im2show) cv2.waitKey(0) # result_path = os.path.join('results', imglist[num_images][:-4] + "_det.jpg") # cv2.imwrite(result_path, im2show) if step: return True return False
def main(args): # load graph data if args.dataset == 'aifb': dataset = AIFBDataset() elif args.dataset == 'mutag': dataset = MUTAGDataset() elif args.dataset == 'bgs': dataset = BGSDataset() elif args.dataset == 'am': dataset = AMDataset() else: raise ValueError() g = dataset[0] category = dataset.predict_category num_classes = dataset.num_classes train_mask = g.nodes[category].data.pop('train_mask') test_mask = g.nodes[category].data.pop('test_mask') train_idx = th.nonzero(train_mask).squeeze() test_idx = th.nonzero(test_mask).squeeze() labels = g.nodes[category].data.pop('labels') # split dataset into train, validate, test if args.validation: val_idx = train_idx[:len(train_idx) // 5] train_idx = train_idx[len(train_idx) // 5:] else: val_idx = train_idx # check cuda device = 'cpu' use_cuda = args.gpu >= 0 and th.cuda.is_available() if use_cuda: th.cuda.set_device(args.gpu) device = 'cuda:%d' % args.gpu train_label = labels[train_idx] val_label = labels[val_idx] test_label = labels[test_idx] # create embeddings embed_layer = RelGraphEmbed(g, args.n_hidden) node_embed = embed_layer() # create model model = EntityClassify(g, args.n_hidden, num_classes, num_bases=args.n_bases, num_hidden_layers=args.n_layers - 2, dropout=args.dropout, use_self_loop=args.use_self_loop) if use_cuda: model.cuda() # train sampler sampler = dgl.sampling.MultiLayerNeighborSampler([args.fanout] * args.n_layers) loader = dgl.sampling.NodeDataLoader(g, {category: train_idx}, sampler, batch_size=args.batch_size, shuffle=True, num_workers=0) # validation sampler val_sampler = dgl.sampling.MultiLayerNeighborSampler([args.fanout] * args.n_layers) val_loader = dgl.sampling.NodeDataLoader(g, {category: val_idx}, val_sampler, batch_size=args.batch_size, shuffle=True, num_workers=0) # test sampler test_sampler = dgl.sampling.MultiLayerNeighborSampler([args.fanout] * args.n_layers) test_loader = dgl.sampling.NodeDataLoader(g, {category: test_idx}, test_sampler, batch_size=args.batch_size, shuffle=True, num_workers=0) # optimizer all_params = itertools.chain(model.parameters(), embed_layer.parameters()) optimizer = th.optim.Adam(all_params, lr=args.lr, weight_decay=args.l2norm) # training loop print("start training...") dur = [] for epoch in range(args.n_epochs): model.train() optimizer.zero_grad() if epoch > 3: t0 = time.time() for i, (input_nodes, seeds, blocks) in enumerate(loader): blocks = [blk.to(device) for blk in blocks] seeds = seeds[ category] # we only predict the nodes with type "category" batch_tic = time.time() emb = extract_embed(node_embed, input_nodes) lbl = labels[seeds] if use_cuda: emb = {k: e.cuda() for k, e in emb.items()} lbl = lbl.cuda() logits = model(emb, blocks)[category] loss = F.cross_entropy(logits, lbl) loss.backward() optimizer.step() train_acc = th.sum(logits.argmax(dim=1) == lbl).item() / len(seeds) print( "Epoch {:05d} | Batch {:03d} | Train Acc: {:.4f} | Train Loss: {:.4f} | Time: {:.4f}" .format(epoch, i, train_acc, loss.item(), time.time() - batch_tic)) if epoch > 3: dur.append(time.time() - t0) val_loss, val_acc = evaluate(model, val_loader, node_embed, labels, category, device) print( "Epoch {:05d} | Valid Acc: {:.4f} | Valid loss: {:.4f} | Time: {:.4f}" .format(epoch, val_acc, val_loss, np.average(dur))) print() if args.model_path is not None: th.save(model.state_dict(), args.model_path) output = model.inference(g, args.batch_size, 'cuda' if use_cuda else 'cpu', 0, node_embed) test_pred = output[category][test_idx] test_labels = labels[test_idx] test_acc = (test_pred.argmax(1) == test_labels).float().mean() print("Test Acc: {:.4f}".format(test_acc)) print()
def inference_relation( cfg, model, data_loader, device="cuda", ): # convert to a torch.device for efficiency device = torch.device(device) num_devices = get_world_size() if num_devices > 1: print("test acc is not support multi gpu") exit(-1) dataset = data_loader.dataset # for relation acc matcher = Matcher( 0, 0, allow_low_quality_matches=False, ) all_acc_num = 0 all_score_acc_num = 0 all_count = 0 all_infer_count = 0 # category variables with open("datasets/coco/panoptic_coco_categories.json", "r") as f: categories_list = json.load(f) categories = {el['id']: el for el in categories_list} id_generator = IdGenerator(categories) # sem categories count = 1 sem_contiguous_ids = [] sem_contiguous_id_to_ps_categoty_id = {} for l in categories_list: if not l["isthing"]: sem_contiguous_ids.append(count) sem_contiguous_id_to_ps_categoty_id[count] = l["id"] count += 1 # compute on dataset model.eval() cpu_device = torch.device("cpu") masker = Masker(threshold=0.5, padding=1) for images, targets, image_ids in data_loader: images = images.to(device) with torch.no_grad(): outputs = model(images) outputs = [o.to(device) for o in outputs] targets = [target.to(device) for target in targets] for image_id, output, target in zip(image_ids, outputs, targets): # generate pred instance id origin_scores = output.get_field("scores") keep = torch.nonzero( origin_scores > cfg.MODEL.SEMANTIC.CONFIDENCE_THR).squeeze(1) output = output[keep] try: match_quality_matrix = boxlist_iou(target, output) matched_idxs = matcher(match_quality_matrix).tolist() except: continue target_instance_ids = target.get_field("instance_ids").tolist() pred_instance_ids = [] for idx in matched_idxs: if idx > 0: pred_instance_ids.append(target_instance_ids[idx]) else: pred_instance_ids.append(-1) target_relations = target.get_field("relations")["relations"] img_info = dataset.get_img_info(image_id) image_width = img_info["width"] image_height = img_info["height"] output = output.resize((image_width, image_height)) # detection result boxes = output.bbox.tolist() scores = output.get_field("scores").tolist() if output.has_field("relation_val"): relation_vals = output.get_field("relation_val").tolist() else: relation_vals = [0. for _ in range(len(scores))] labels = output.get_field("labels").tolist() labels = [ dataset.contiguous_category_id_to_json_id[i] for i in labels ] # mask result masks = output.get_field("mask") # Masker is necessary only if masks haven't been already resized. if list(masks.shape[-2:]) != [image_height, image_width]: masks = masker(masks.expand(1, -1, -1, -1, -1), output) masks = masks[0] # construct instance results inst_results = [] for box, score, instance_id, relation_val, label, mask in zip( boxes, scores, pred_instance_ids, relation_vals, labels, masks): inst_results.append({ "box": box, "score": score, "instance_id": instance_id, "relation_val": relation_val, "label": label, "segmentation": COCOmask.encode(np.asfortranarray(mask[0])) }) # segmentation fusion acc_num, score_acc_num, infer_count, count = combine_to_panoptic_for_acc( cfg, img_info, inst_results, id_generator, target_relations) all_acc_num += acc_num all_score_acc_num += score_acc_num all_count += count all_infer_count += infer_count if count > 0: print(all_acc_num, all_score_acc_num, all_count, all_infer_count) print("RAP acc:", all_acc_num / all_count) print("score relation acc:", all_score_acc_num / all_count) print(all_infer_count)
import torch import numpy as np a = torch.Tensor([[1, 2], [3, 4], [5, 6]]) print(a > 3) print(a[a > 3]) print(torch.nonzero(a > 3))
def write_results(self, prediction, confidence, num_classes, nms_conf=0.4): """ 函数的结果为dx8的张量,每个检测有8个属性, 即检测所属批次图像的索引、四个location, object score, max class score, max class score index :param prediction: :param confidence: :param num_classes: :param nms_conf: :return: """ # 过滤分数低的bbox,并保留他们,方便后续向量化操作(每张图过滤后的数目不同) conf_mask = (prediction[:, :, 4] > confidence).float().unsqueeze(2) prediction = prediction * conf_mask # nms: 对每个类别相似的边界框做过滤 # 转成对角线的坐标的形式,使用两个对角线的坐标的形式更好计算IOU box_corner = prediction.new(prediction.shape) box_corner[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2) box_corner[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2) box_corner[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2) box_corner[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2) prediction[:, :, :4] = box_corner[:, :, :4] # 每张图片经过nms出来的结果数目不一致, # 不能通过向量操作 batch_size = prediction.size(0) write = False # 是否初始化output的标志 for ind in range(batch_size): image_pred = prediction[ind] # 每个边界框有85个属性,其中80个是类别score。 # 只关心最高分的class score, # 每行删除80个类别分数,添加具有最大值的class score的索引和class score max_conf, max_conf_index = torch.max( image_pred[:, 5:5 + num_classes], 1) max_conf = max_conf.float().unsqueeze(1) max_conf_index = max_conf_index.float().unsqueeze(1) seq = (image_pred[:, :5], max_conf, max_conf_index) image_pred = torch.cat(seq, 1) # 过滤分数低的bbox,可能存在没有obj score大于阈值的bbox # debug, torch.nonzero出来的是非零元素的索引 non_zero_ind = torch.nonzero(image_pred[:, 4]) try: image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7) except: continue # For PyTorch 0.4 compatibility # Since the above code with not raise exception for no detection # as scalars are supported in PyTorch 0.4 if image_pred_.shape[0] == 0: continue img_classes = unique(image_pred_[:, -1]) # 按类别执行NMS for cls in img_classes: # perform NMS # 1. 提取特定类的检测值 cls_mask = image_pred_ * (image_pred_[:, -1] == cls).float().unsqueeze(1) class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze() image_pred_class = image_pred_[class_mask_ind].view(-1, 7) conf_sort_index = torch.sort(image_pred_class[:, 4], descending=True)[1] image_pred_class = image_pred_class[conf_sort_index] idx = image_pred_class.size(0) for i in range(idx): try: ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i + 1:]) except ValueError: break except IndexError: break # 将iou > threshold 的bbox置为零, 留下iou < threshold的bbox iou_mask = (ious < nms_conf).float().unsqueeze(1) image_pred_class[i + 1:] *= iou_mask # 消除iou > nms_conf 的bbox, 留下iou < threshold的bbox non_zero_ind = torch.nonzero( image_pred_class[:, 4]).squeeze() image_pred_class = image_pred_class[non_zero_ind].view( -1, 7) batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind) # Repeat the batch_id for as many detections of the class cls in the image seq = batch_ind, image_pred_class # 函数的结果为dx8的张量,每个检测有8个属性, # 即检测所属批次图像的索引、四个location, object score, max class score, max class score index if not write: output = torch.cat(seq, 1) write = True else: out = torch.cat(seq, 1) output = torch.cat((output, out)) try: return output except: return 0
result_errors = list() for level in torch.arange(7, 3, -1): if level > 0: I = torch.tensor(pyramid_I[level].astype(np.float32)).to(device) J = torch.tensor(pyramid_J[level].astype(np.float32)).to(device) else: I = torch.tensor(gaussian(pyramid_I[level].astype(np.float32),2.0)).to(device) J = torch.tensor(gaussian(pyramid_J[level].astype(np.float32),2.0)).to(device) height,width = I.shape # choose a set of pixel locations on the template image that are most informative tval = 0.9*threshold_otsu(I.cpu().numpy()) # reduce Otsu threshold value a bit to cover slightly wider areas important_ind = torch.nonzero((I.data>tval).view([height*width])).squeeze() # generate grid only once at each level yv, xv = torch.meshgrid([torch.arange(0,height).float().to(device), torch.arange(0,width).float().to(device)]) # map coordinates to [-1,1]x[-1,1] so that grid_sample works properly yv = 2.0*yv/(height-1) - 1.0 xv = 2.0*xv/(width-1) - 1.0 # result = train(rho_data,mine_net,mine_net_optim) for itr in range(nItr[level]): # for itr in range(1000): C = torch.sum(B*v, 0)
def compute_reference_loss(data_dict, config): """ Compute cluster reference loss Args: data_dict: dict (read-only) Returns: ref_loss, lang_loss, cluster_preds, cluster_labels """ # NOTE: N := num_batches (B) * num_points_per_scene # NOTE: data_dict["cluster_ref"] are the cluster confidences from the match_module.py # B := batch_size, num_proposal is fixed because of match_module.py! # unpack cluster_preds = data_dict["cluster_ref"] # (B, num_proposal) # GT segmentation # label creation without using the class labels and real ground thruths # because loc. loss should be independant of obj. class. loss and # segmentation loss. (+ the same class can appear more than once) # hence we want to compare each cluster with the real cluster to find # the best cluster. gt_instances = data_dict['instance_labels'] # (N) target_inst_id = data_dict[ 'object_id'] # (B)#target_inst_id = torch.tensor(data_dict['object_id']).cuda() # (B) # as no extra batch_dim exists this gives the index of a next sample start_of_samples = data_dict['offsets'] # (B) proposal_batch_ids = data_dict['proposal_batch_ids'] # (nProposal + 1) # PointGroup: # NOTE: in PG clustering alg. only points of the same class can be in one cluster # they can be assigned mutliple clusters though, and point idx don't restart per # batch but continue throughout all the batch (as there is no extra batch_dim) # dim 1 for cluster_id, dim 2 for corresponding point idxs in N # sumNPoint: additional explanation in pointgroup.py preds_instances = data_dict['proposals_idx'] # (sumNPoint, 2) preds_offsets = data_dict['proposals_offset'] # (nProposal + 1) batch_size, num_proposals = cluster_preds.shape total_num_proposals = len(preds_offsets) - 1 labels = torch.zeros(total_num_proposals) # reference loss criterion = SoftmaxRankingLoss() loss = torch.tensor(0, dtype=torch.float).cuda() # TODO: vectorize - instead of double iterative approach # for each sample in batch cluster_labels = torch.zeros_like(cluster_preds).cuda() for i in range(batch_size): start = start_of_samples[i] end = start_of_samples[i + 1] # gt_instances contains for each of the points their corresponding cluster_id # NOTE: we assume the point_ids are assigned based on their order in gt_instances # we also assume that these ids match with the point_ids from PG correct_indices = (torch.arange( len(gt_instances))[gt_instances == target_inst_id[i]]).cuda() # nSamples is the number of points that are asigned to some clusters in one scene # NOTE: only works with an extra batch_size dimension #nSamples = preds_instances[i].shape[0] numbSamplePerCluster = torch.zeros(total_num_proposals) labels = torch.zeros(total_num_proposals) # TODO: are the gt_instances also unordered? no # TODO: is proposal_idx at the end actually 2*sumNPoints? dunno # select the correct ones # in preds_instances the proposals aren't ordered batchwise! # use proposal_batch_ids, preds_offsets to get correct window in preds_instances batch_proposals = preds_offsets[:-1][proposal_batch_ids == i] # proposals of one scene for j in range(len(batch_proposals)): start_id_proposal = batch_proposals[j] start_proposal_index = torch.nonzero( preds_offsets == batch_proposals[j]) end_id_proposal = preds_offsets[start_proposal_index + 1] preds_instance_proposals = preds_instances[ start_id_proposal:end_id_proposal] cluster_ids, member_points = preds_instance_proposals[:, 0], preds_instance_proposals[:, 1].long( ) cluster_id = cluster_ids[0] numbSamplePerCluster[cluster_id] = cluster_ids.shape[0] combined = torch.cat((member_points, correct_indices)) _, counts = combined.unique(return_counts=True) numb_object_id_proposals = counts[counts > 1].shape[0] labels[cluster_id] = numb_object_id_proposals # union of points in real instance (gt) and respective pred instance # - labels to not have the intersection count double numbSamplePerCluster += len(correct_indices) - labels # normalize intersection with union => IoU score now labels = labels / numbSamplePerCluster max_elem = labels.max() # convert to one-hot-matrix with 0 on max per row # TODO: necessary if? -> # If no IoU with GT if max_elem != 0: labels = torch.floor(labels / max_elem) else: break # scene-wise loss calucation # labels is total_num_proposals long (same size as proposal_batch_ids) cluster_labels_scene = torch.FloatTensor( labels[proposal_batch_ids == i]).cuda() cluster_preds_scene = cluster_preds[i][:cluster_labels_scene.shape[ 0]] # because in matching module 0s were added for missing values # loss = 0 is defined above loss += criterion(cluster_preds_scene, cluster_labels_scene.float()) cluster_labels_scene_fill = torch.zeros( num_proposals - cluster_labels_scene.shape[0]).cuda() cluster_labels[i] = torch.cat( [cluster_labels_scene, cluster_labels_scene_fill]) #cluster_labels = torch.FloatTensor(labels).cuda() # TODO: check if cluster_id starts with 0 loss /= batch_size return loss, cluster_preds, cluster_labels
def run_standard_evaluation(self, x_orig, y_orig, bs=250): if self.verbose: print('using {} version including {}'.format(self.version, ', '.join(self.attacks_to_run))) with torch.no_grad(): # calculate accuracy n_batches = int(np.ceil(x_orig.shape[0] / bs)) robust_flags = torch.zeros(x_orig.shape[0], dtype=torch.bool, device=x_orig.device) for batch_idx in range(n_batches): start_idx = batch_idx * bs end_idx = min( (batch_idx + 1) * bs, x_orig.shape[0]) x = x_orig[start_idx:end_idx, :].clone().to(self.device) y = y_orig[start_idx:end_idx].clone().to(self.device) output = self.get_logits(x) correct_batch = y.eq(output.max(dim=1)[1]) robust_flags[start_idx:end_idx] = correct_batch.detach().to(robust_flags.device) robust_accuracy = torch.sum(robust_flags).item() / x_orig.shape[0] if self.verbose: self.logger.log('initial accuracy: {:.2%}'.format(robust_accuracy)) x_adv = x_orig.clone().detach() startt = time.time() for attack in self.attacks_to_run: # item() is super important as pytorch int division uses floor rounding num_robust = torch.sum(robust_flags).item() if num_robust == 0: break n_batches = int(np.ceil(num_robust / bs)) robust_lin_idcs = torch.nonzero(robust_flags, as_tuple=False) if num_robust > 1: robust_lin_idcs.squeeze_() for batch_idx in range(n_batches): start_idx = batch_idx * bs end_idx = min((batch_idx + 1) * bs, num_robust) batch_datapoint_idcs = robust_lin_idcs[start_idx:end_idx] if len(batch_datapoint_idcs.shape) > 1: batch_datapoint_idcs.squeeze_(-1) x = x_orig[batch_datapoint_idcs, :].clone().to(self.device) y = y_orig[batch_datapoint_idcs].clone().to(self.device) # make sure that x is a 4d tensor even if there is only a single datapoint left if len(x.shape) == 3: x.unsqueeze_(dim=0) # run attack if attack == 'apgd-ce': # apgd on cross-entropy loss self.apgd.loss = 'ce' self.apgd.seed = self.get_seed() _, adv_curr = self.apgd.perturb(x, y, cheap=True) elif attack == 'apgd-dlr': # apgd on dlr loss self.apgd.loss = 'dlr' self.apgd.seed = self.get_seed() _, adv_curr = self.apgd.perturb(x, y, cheap=True) elif attack == 'fab': # fab self.fab.targeted = False self.fab.seed = self.get_seed() adv_curr = self.fab.perturb(x, y) elif attack == 'square': # square self.square.seed = self.get_seed() adv_curr = self.square.perturb(x, y) elif attack == 'apgd-t': # targeted apgd self.apgd_targeted.seed = self.get_seed() _, adv_curr = self.apgd_targeted.perturb(x, y, cheap=True) elif attack == 'fab-t': # fab targeted self.fab.targeted = True self.fab.n_restarts = 1 self.fab.seed = self.get_seed() adv_curr = self.fab.perturb(x, y) else: raise ValueError('Attack not supported') output = self.get_logits(adv_curr) false_batch = ~y.eq(output.max(dim=1)[1]).to(robust_flags.device) non_robust_lin_idcs = batch_datapoint_idcs[false_batch] robust_flags[non_robust_lin_idcs] = False x_adv[non_robust_lin_idcs] = adv_curr[false_batch].detach().to(x_adv.device) if self.verbose: num_non_robust_batch = torch.sum(false_batch) self.logger.log('{} - {}/{} - {} out of {} successfully perturbed'.format( attack, batch_idx + 1, n_batches, num_non_robust_batch, x.shape[0])) robust_accuracy = torch.sum(robust_flags).item() / x_orig.shape[0] if self.verbose: self.logger.log('robust accuracy after {}: {:.2%} (total time {:.1f} s)'.format( attack.upper(), robust_accuracy, time.time() - startt)) # final check if self.verbose: if self.norm == 'Linf': res = (x_adv - x_orig).abs().view(x_orig.shape[0], -1).max(1)[0] elif self.norm == 'L2': res = ((x_adv - x_orig) ** 2).view(x_orig.shape[0], -1).sum(-1).sqrt() elif self.norm == 'dftinf': res = norm_f(x_adv - x_orig, 'dftinf') self.logger.log('max {} perturbation: {:.5f}, nan in tensor: {}, max: {:.5f}, min: {:.5f}'.format( self.norm, res.max(), (x_adv != x_adv).sum(), x_adv.max(), x_adv.min())) self.logger.log('robust accuracy: {:.2%}'.format(robust_accuracy)) return x_adv
def get_adj_nodes(graph, nodes): # get adj nodes re = set(nodes) yes = set(torch.nonzero(graph[nodes])[:, 1].cpu().data.numpy()) return sorted(list(re | yes))
def _foo(t): tuple_result = torch.nonzero(t, as_tuple=True) nontuple_result = torch.nonzero(t, as_tuple=False) out = torch.empty_like(nontuple_result) torch.nonzero(t, as_tuple=False, out=out) return tuple_result, nontuple_result, out
def __getitem__(self, index): if self.training: index_ratio = int(self.ratio_index[index]) else: index_ratio = index # get the anchor index for current sample index # here we set the anchor index to the last one # sample in this group minibatch_db = [self._roidb[index_ratio]] blobs = get_minibatch(minibatch_db, self._num_classes) data = torch.from_numpy(blobs['data']) im_info = torch.from_numpy(blobs['im_info']) # we need to random shuffle the bounding box. data_height, data_width = data.size(1), data.size(2) if self.training: np.random.shuffle(blobs['gt_boxes']) gt_boxes = torch.from_numpy(blobs['gt_boxes']) ######################################################## # padding the input image to fixed size for each group # ######################################################## # NOTE1: need to cope with the case where a group cover both conditions. (done) # NOTE2: need to consider the situation for the tail samples. (no worry) # NOTE3: need to implement a parallel data loader. (no worry) # get the index range # if the image need to crop, crop to the target size. ratio = self.ratio_list_batch[index] if self._roidb[index_ratio]['need_crop']: if ratio < 1: # this means that data_width << data_height, we need to crop the # data_height min_y = int(torch.min(gt_boxes[:,1])) max_y = int(torch.max(gt_boxes[:,3])) trim_size = int(np.floor(data_width / ratio)) if trim_size > data_height: trim_size = data_height box_region = max_y - min_y + 1 if min_y == 0: y_s = 0 else: if (box_region-trim_size) < 0: y_s_min = max(max_y-trim_size, 0) y_s_max = min(min_y, data_height-trim_size) if y_s_min == y_s_max: y_s = y_s_min else: y_s = np.random.choice(range(y_s_min, y_s_max)) else: y_s_add = int((box_region-trim_size)/2) if y_s_add == 0: y_s = min_y else: y_s = np.random.choice(range(min_y, min_y+y_s_add)) # crop the image data = data[:, y_s:(y_s + trim_size), :, :] # shift y coordiante of gt_boxes gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s) gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s) # update gt bounding box according the trip gt_boxes[:, 1].clamp_(0, trim_size - 1) gt_boxes[:, 3].clamp_(0, trim_size - 1) else: # this means that data_width >> data_height, we need to crop the # data_width min_x = int(torch.min(gt_boxes[:,0])) max_x = int(torch.max(gt_boxes[:,2])) trim_size = int(np.ceil(data_height * ratio)) if trim_size > data_width: trim_size = data_width box_region = max_x - min_x + 1 if min_x == 0: x_s = 0 else: if (box_region-trim_size) < 0: x_s_min = max(max_x-trim_size, 0) x_s_max = min(min_x, data_width-trim_size) if x_s_min == x_s_max: x_s = x_s_min else: x_s = np.random.choice(range(x_s_min, x_s_max)) else: x_s_add = int((box_region-trim_size)/2) if x_s_add == 0: x_s = min_x else: x_s = np.random.choice(range(min_x, min_x+x_s_add)) # crop the image data = data[:, :, x_s:(x_s + trim_size), :] # shift x coordiante of gt_boxes gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s) gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s) # update gt bounding box according the trip gt_boxes[:, 0].clamp_(0, trim_size - 1) gt_boxes[:, 2].clamp_(0, trim_size - 1) # based on the ratio, padding the image. if ratio < 1: # this means that data_width < data_height trim_size = int(np.floor(data_width / ratio)) padding_data = torch.FloatTensor(int(np.ceil(data_width / ratio)), \ data_width, 3).zero_() padding_data[:data_height, :, :] = data[0] # update im_info im_info[0, 0] = padding_data.size(0) # print("height %d %d \n" %(index, anchor_idx)) elif ratio > 1: # this means that data_width > data_height # if the image need to crop. padding_data = torch.FloatTensor(data_height, \ int(np.ceil(data_height * ratio)), 3).zero_() padding_data[:, :data_width, :] = data[0] im_info[0, 1] = padding_data.size(1) else: trim_size = min(data_height, data_width) padding_data = torch.FloatTensor(trim_size, trim_size, 3).zero_() padding_data = data[0][:trim_size, :trim_size, :] # gt_boxes.clamp_(0, trim_size) gt_boxes[:, :4].clamp_(0, trim_size) im_info[0, 0] = trim_size im_info[0, 1] = trim_size # check the bounding box: not_keep = (gt_boxes[:,0] == gt_boxes[:,2]) | (gt_boxes[:,1] == gt_boxes[:,3]) keep = torch.nonzero(not_keep == 0).view(-1) gt_boxes_padding = torch.FloatTensor(self.max_num_box, gt_boxes.size(1)).zero_() if keep.numel() != 0: gt_boxes = gt_boxes[keep] num_boxes = min(gt_boxes.size(0), self.max_num_box) gt_boxes_padding[:num_boxes,:] = gt_boxes[:num_boxes] else: num_boxes = 0 # permute trim_data to adapt to downstream processing padding_data = padding_data.permute(2, 0, 1).contiguous() im_info = im_info.view(3) return padding_data, im_info, gt_boxes_padding, num_boxes else: data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width) im_info = im_info.view(3) gt_boxes = torch.FloatTensor([1,1,1,1,1]) num_boxes = 0 return data, im_info, gt_boxes, num_boxes
def get_bboxes_single(self, cls_scores, bbox_preds, mlvl_anchors, img_shape, scale_factor, cfg, gt_bboxes, gt_labels, rescale=False, parent_scores=None): mlvl_proposals = [] for idx in range(len(cls_scores)): rpn_cls_score = cls_scores[idx] rpn_bbox_pred = bbox_preds[idx] assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] rpn_cls_score = rpn_cls_score.permute(1, 2, 0) if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.reshape(-1) scores = rpn_cls_score.sigmoid() else: rpn_cls_score = rpn_cls_score.reshape(-1, 2) scores = rpn_cls_score.softmax(dim=1)[:, 1] rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4) anchors = mlvl_anchors[idx] if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre: _, topk_inds = scores.topk(cfg.nms_pre) rpn_bbox_pred = rpn_bbox_pred[topk_inds, :] anchors = anchors[topk_inds, :] scores = scores[topk_inds] proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means, self.target_stds, img_shape) if cfg.min_bbox_size > 0: w = proposals[:, 2] - proposals[:, 0] + 1 h = proposals[:, 3] - proposals[:, 1] + 1 valid_inds = torch.nonzero((w >= cfg.min_bbox_size) & (h >= cfg.min_bbox_size)).squeeze() proposals = proposals[valid_inds, :] scores = scores[valid_inds] proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1) if cfg.nms_resampling is not None: # only used in training if cfg.nms_resampling[0] == 'discrete': a_r = cfg.nms_resampling[1] a_c = cfg.nms_resampling[2] a_f = cfg.nms_resampling[3] proposals = self.nms_resampling_discrete( proposals, gt_bboxes, gt_labels, a_r, a_c, a_f) elif cfg.nms_resampling[0] == 'linear': thresh = cfg.nms_resampling[1] proposals = self.nms_resampling_linear( proposals, gt_bboxes, gt_labels, thresh) else: proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.nms_post, :] mlvl_proposals.append(proposals) proposals = torch.cat(mlvl_proposals, 0) if cfg.nms_across_levels: proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.max_num, :] else: scores = proposals[:, 4] num = min(cfg.max_num, proposals.shape[0]) _, topk_inds = scores.topk(num) proposals = proposals[topk_inds, :] return proposals
def beam_search_decode(self, input_tensor_with_lengths, beam_size=1, **kwargs): input_tensor, _ = input_tensor_with_lengths input_sequence_length, batch_size = input_tensor.size() target_length = min(int(cfg.maximum_decoding_length * 1.1), input_sequence_length * 2) src_mask = None # self.backbone.generate_square_subsequent_mask(input_tensor.size(0)).to(device) src_key_padding_mask = ( input_tensor == self.SRC.vocab.stoi[cfg.pad_token]).transpose( 0, 1).to(device) init_ys = torch.ones(1, batch_size).fill_( self.TGT.vocab.stoi[cfg.bos_token]).type_as(input_tensor.data) memory = self.backbone.encoder( self.src_embed(input_tensor), mask=src_mask, src_key_padding_mask=src_key_padding_mask) nodes = [(init_ys, torch.zeros(batch_size, device=device), torch.zeros(batch_size, device=device).bool())] final_results = [] for i in range(target_length - 1): k = beam_size - len(final_results) if k < 1: break all_predictions = torch.zeros(batch_size, len(nodes) * k, device=device).long() all_lm_scores = torch.zeros(batch_size, len(nodes) * k, device=device).float() # iterating over all the available hypotheses to expand the beams for n_id, (ys, lm_scores, eos_predicted) in enumerate(nodes): prob = self.extract_output_probabilities( ys, memory, src_key_padding_mask) k_values, k_indices = torch.topk(prob, dim=1, k=k) for beam_index in range(k): overall_index = n_id * k + beam_index all_predictions[:, overall_index] = k_indices[:, beam_index] all_lm_scores[:, overall_index] = lm_scores + k_values[:, beam_index] k_values, k_indices = torch.topk(all_lm_scores, dim=1, k=k) temp_next_nodes = [] # creating the next k hypotheses for beam_index in range(k): node_ids = k_indices[:, beam_index] // k node_ids = list( node_ids.cpu().numpy()) # list of size batch_size pred_ids = list(k_indices[:, beam_index].cpu().numpy()) lm_score = k_values[:, beam_index] next_word = torch.zeros((batch_size, ), device=device).long() for b in range(batch_size): next_word[b] = all_predictions[b, pred_ids[b]] eos_p = torch.cat([ nodes[n_id][2][b_id].unsqueeze(0) for b_id, n_id in enumerate(node_ids) ], dim=0) eos_predicted = torch.max( eos_p, (next_word == self.TGT.vocab.stoi[cfg.eos_token])) ys = torch.cat([ nodes[n_id][0][:, b_id].unsqueeze(1) for b_id, n_id in enumerate(node_ids) ], dim=1) ys = torch.cat([ys, next_word.view(1, batch_size)], dim=0) next_step_node = (ys, lm_score, eos_predicted) if sum(eos_predicted.int()) == batch_size: final_results.append(next_step_node) else: temp_next_nodes.append(next_step_node) del nodes[:] nodes = temp_next_nodes if not len(final_results): for node in nodes: final_results.append(node) # creating the final result based on the best scoring hypotheses result = torch.zeros(target_length, batch_size, device=device) lp = lambda l: ((5 + l)**self.beam_search_length_norm_factor) / ( 5 + 1)**self.beam_search_length_norm_factor for b_ind in range(batch_size): best_score = float('-inf') best_tokens = None for node in final_results: tokens = node[0][:, b_ind] eos_ind = torch.nonzero(torch.eq( tokens, self.TGT.vocab.stoi[cfg.eos_token]), as_tuple=False).view(-1) if eos_ind.size(0): tsize = eos_ind[0].item() else: tsize = tokens.size(0) # based on Google's NMT system paper [https://arxiv.org/pdf/1609.08144.pdf] # since coverage is not being tracked here, coverage penalty is not also considered in this formula lms = node[1][b_ind].item() / lp(tsize) if lms > best_score: best_score = lms best_tokens = tokens result[:best_tokens[1:].size(0), b_ind] = best_tokens[1:] max_attention_indices = None return result, max_attention_indices, torch.zeros(1, device=device), 1, 1
def sparse(dense): indices = torch.nonzero(dense).t() values = dense[indices[0], indices[1]] # modify this based on dimensionality return torch.sparse.FloatTensor(indices, values, dense.size())
def create_thinning_recipe_filters(sgraph, model, zeros_mask_dict): """Create a recipe for removing filters from Convolution layers. The 4D weights of the model parameters (i.e. the convolution parameters) are examined one by one, to determine which has filters that are all zeros. For each weights tensor that has at least one zero-filter, we create a "thinning recipe". The thinning recipe contains meta-instructions of how the model should be changed in order to remove the filters. """ msglogger.info("Invoking create_thinning_recipe_filters") thinning_recipe = ThinningRecipe(modules={}, parameters={}) layers = {mod_name : m for mod_name, m in model.named_modules()} for param_name, param in model.named_parameters(): # We are only interested in 4D weights if param.dim() != 4: continue # Find the number of zero-valued filters in this weights tensor filter_view = param.view(param.size(0), -1) num_filters = filter_view.size()[0] nonzero_filters = torch.nonzero(filter_view.abs().sum(dim=1)) # If there are non-zero filters in this tensor then continue to next tensor if num_filters <= len(nonzero_filters): msglogger.debug("SKipping {} shape={}".format(param_name_2_layer_name(param_name), param.shape)) continue msglogger.info("In tensor %s found %d/%d zero filters", param_name, num_filters - len(nonzero_filters), num_filters) # We are removing filters, so update the number of outgoing channels (OFMs) # in the convolutional layer layer_name = param_name_2_layer_name(param_name) assert isinstance(layers[layer_name], torch.nn.modules.Conv2d) append_module_directive(thinning_recipe, layer_name, key='out_channels', val=len(nonzero_filters)) # Select only the non-zero filters indices = nonzero_filters.data.squeeze() append_param_directive(thinning_recipe, param_name, (0, indices)) if layers[layer_name].bias is not None: # This convolution has bias coefficients append_param_directive(thinning_recipe, layer_name+'.bias', (0, indices)) # Find all instances of Convolution or FC (GEMM) layers that immediately follow this layer successors = sgraph.successors_f(normalize_module_name(layer_name), ['Conv', 'Gemm']) # Convert the layers names to PyTorch's convoluted naming scheme (when DataParallel is used) successors = [denormalize_module_name(model, successor) for successor in successors] for successor in successors: if isinstance(layers[successor], torch.nn.modules.Conv2d): # For each of the convolutional layers that follow, we have to reduce the number of input channels. append_module_directive(thinning_recipe, successor, key='in_channels', val=len(nonzero_filters)) msglogger.info("[recipe] {}: setting in_channels = {}".format(successor, len(nonzero_filters))) # Now remove channels from the weights tensor of the successor conv append_param_directive(thinning_recipe, successor+'.weight', (1, indices)) elif isinstance(layers[successor], torch.nn.modules.Linear): # If a Linear (Fully-Connected) layer follows, we need to update it's in_features member fm_size = layers[successor].in_features // layers[layer_name].out_channels in_features = fm_size * len(nonzero_filters) #append_module_directive(thinning_recipe, layer_name, key='in_features', val=in_features) append_module_directive(thinning_recipe, successor, key='in_features', val=in_features) msglogger.info("[recipe] {}: setting in_features = {}".format(successor, in_features)) # Now remove channels from the weights tensor of the successor FC layer: # This is a bit tricky: fm_height = fm_width = int(math.sqrt(fm_size)) view_4D = (layers[successor].out_features, layers[layer_name].out_channels, fm_height, fm_width) view_2D = (layers[successor].out_features, in_features) append_param_directive(thinning_recipe, successor+'.weight', (1, indices, view_4D, view_2D)) # Now handle the BatchNormalization layer that follows the convolution bn_layers = sgraph.successors_f(normalize_module_name(layer_name), ['BatchNormalization']) if len(bn_layers) > 0: assert len(bn_layers) == 1 # Thinning of the BN layer that follows the convolution bn_layer_name = denormalize_module_name(model, bn_layers[0]) bn_thinning(thinning_recipe, layers, bn_layer_name, len_thin_features=len(nonzero_filters), thin_features=indices) return thinning_recipe
def __getitem__(self, index): if self.training: index_ratio = int(self.ratio_index[index]) else: index_ratio = index # get the anchor index for current sample index # here we set the anchor index to the last one # sample in this group minibatch_db = [self._roidb[index_ratio]] blobs = get_minibatch(minibatch_db, self._num_classes) data = torch.from_numpy(blobs['data']) im_info = torch.from_numpy(blobs['im_info']) # we need to random shuffle the bounding box. data_height, data_width = data.size(1), data.size(2) if self.training: np.random.shuffle(blobs['gt_boxes']) gt_boxes = torch.from_numpy(blobs['gt_boxes']) ######################################################## # padding the input image to fixed size for each group # ######################################################## # NOTE1: need to cope with the case where a group cover both conditions. (done) # NOTE2: need to consider the situation for the tail samples. (no worry) # NOTE3: need to implement a parallel data loader. (no worry) # get the index range # if the image need to crop, crop to the target size. ratio = self.ratio_list_batch[index] if self._roidb[index_ratio]['need_crop']: if ratio < 1: # this means that data_width << data_height, we need to crop the # data_height min_y = int(torch.min(gt_boxes[:, 1])) max_y = int(torch.max(gt_boxes[:, 3])) trim_size = int(np.floor(data_width / ratio)) if trim_size > data_height: trim_size = data_height box_region = max_y - min_y + 1 if min_y == 0: y_s = 0 else: if (box_region - trim_size) < 0: y_s_min = max(max_y - trim_size, 0) y_s_max = min(min_y, data_height - trim_size) if y_s_min == y_s_max: y_s = y_s_min else: y_s = np.random.choice(range(y_s_min, y_s_max)) else: y_s_add = int((box_region - trim_size) / 2) if y_s_add == 0: y_s = min_y else: y_s = np.random.choice( range(min_y, min_y + y_s_add)) # crop the image data = data[:, y_s:(y_s + trim_size), :, :] # shift y coordiante of gt_boxes gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s) gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s) # update gt bounding box according the trip gt_boxes[:, 1].clamp_(0, trim_size - 1) gt_boxes[:, 3].clamp_(0, trim_size - 1) else: # this means that data_width >> data_height, we need to crop the # data_width min_x = int(torch.min(gt_boxes[:, 0])) max_x = int(torch.max(gt_boxes[:, 2])) trim_size = int(np.ceil(data_height * ratio)) if trim_size > data_width: trim_size = data_width box_region = max_x - min_x + 1 if min_x == 0: x_s = 0 else: if (box_region - trim_size) < 0: x_s_min = max(max_x - trim_size, 0) x_s_max = min(min_x, data_width - trim_size) if x_s_min == x_s_max: x_s = x_s_min else: x_s = np.random.choice(range(x_s_min, x_s_max)) else: x_s_add = int((box_region - trim_size) / 2) if x_s_add == 0: x_s = min_x else: x_s = np.random.choice( range(min_x, min_x + x_s_add)) # crop the image data = data[:, :, x_s:(x_s + trim_size), :] # shift x coordiante of gt_boxes gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s) gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s) # update gt bounding box according the trip gt_boxes[:, 0].clamp_(0, trim_size - 1) gt_boxes[:, 2].clamp_(0, trim_size - 1) # based on the ratio, padding the image. if ratio < 1: # this means that data_width < data_height trim_size = int(np.floor(data_width / ratio)) padding_data = torch.FloatTensor(int(np.ceil(data_width / ratio)), \ data_width, 3).zero_() padding_data[:data_height, :, :] = data[0] # update im_info im_info[0, 0] = padding_data.size(0) # print("height %d %d \n" %(index, anchor_idx)) elif ratio > 1: # this means that data_width > data_height # if the image need to crop. padding_data = torch.FloatTensor(data_height, \ int(np.ceil(data_height * ratio)), 3).zero_() padding_data[:, :data_width, :] = data[0] im_info[0, 1] = padding_data.size(1) else: trim_size = min(data_height, data_width) padding_data = torch.FloatTensor(trim_size, trim_size, 3).zero_() padding_data = data[0][:trim_size, :trim_size, :] # gt_boxes.clamp_(0, trim_size) gt_boxes[:, :4].clamp_(0, trim_size) im_info[0, 0] = trim_size im_info[0, 1] = trim_size # check the bounding box: if len(gt_boxes) > 0: not_keep = (gt_boxes[:, 0] == gt_boxes[:, 2]) | ( gt_boxes[:, 1] == gt_boxes[:, 3]) keep = torch.nonzero(not_keep == 0).view(-1) gt_boxes_padding = torch.FloatTensor(self.max_num_box, gt_boxes.size(1)).zero_() if keep.numel() != 0: gt_boxes = gt_boxes[keep] num_boxes = min(gt_boxes.size(0), self.max_num_box) gt_boxes_padding[:num_boxes, :] = gt_boxes[:num_boxes] else: num_boxes = 0 # permute trim_data to adapt to downstream processing padding_data = padding_data.permute(2, 0, 1).contiguous() im_info = im_info.view(3) return padding_data, im_info, gt_boxes_padding, num_boxes else: data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width) im_info = im_info.view(3) gt_boxes = torch.FloatTensor([1, 1, 1, 1, 1]) num_boxes = 0 return data, im_info, gt_boxes, num_boxes
def write_results_half(prediction, confidence, num_classes, nms=True, nms_conf=0.4): conf_mask = (prediction[:, :, 4] > confidence).half().unsqueeze(2) prediction = prediction * conf_mask try: ind_nz = torch.nonzero(prediction[:, :, 4]).transpose(0, 1).contiguous() except: return 0 box_a = prediction.new(prediction.shape) box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2) box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2) box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2) box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2) prediction[:, :, :4] = box_a[:, :, :4] batch_size = prediction.size(0) output = prediction.new(1, prediction.size(2) + 1) write = False for ind in range(batch_size): #select the image from the batch image_pred = prediction[ind] #Get the class having maximum score, and the index of that class #Get rid of num_classes softmax scores #Add the class index and the class score of class having maximum score max_conf, max_conf_score = torch.max(image_pred[:, 5:5 + num_classes], 1) max_conf = max_conf.half().unsqueeze(1) max_conf_score = max_conf_score.half().unsqueeze(1) seq = (image_pred[:, :5], max_conf, max_conf_score) image_pred = torch.cat(seq, 1) #Get rid of the zero entries non_zero_ind = (torch.nonzero(image_pred[:, 4])) try: image_pred_ = image_pred[non_zero_ind.squeeze(), :] except: continue #Get the various classes detected in the image img_classes = unique(image_pred_[:, -1].long()).half() #WE will do NMS classwise for cls in img_classes: #get the detections with one particular class cls_mask = image_pred_ * (image_pred_[:, -1] == cls).half().unsqueeze(1) class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze() image_pred_class = image_pred_[class_mask_ind] #sort the detections such that the entry with the maximum objectness #confidence is at the top conf_sort_index = torch.sort(image_pred_class[:, 4], descending=True)[1] image_pred_class = image_pred_class[conf_sort_index] idx = image_pred_class.size(0) #if nms has to be done if nms: #For each detection for i in range(idx): #Get the IOUs of all boxes that come after the one we are looking at #in the loop try: ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i + 1:]) except ValueError: break except IndexError: break #Zero out all the detections that have IoU > treshhold iou_mask = (ious < nms_conf).half().unsqueeze(1) image_pred_class[i + 1:] *= iou_mask #Remove the non-zero entries non_zero_ind = torch.nonzero( image_pred_class[:, 4]).squeeze() image_pred_class = image_pred_class[non_zero_ind] #Concatenate the batch_id of the image to the detection #this helps us identify which image does the detection correspond to #We use a linear straucture to hold ALL the detections from the batch #the batch_dim is flattened #batch is identified by extra batch column batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind) seq = batch_ind, image_pred_class if not write: output = torch.cat(seq, 1) write = True else: out = torch.cat(seq, 1) output = torch.cat((output, out)) return output
else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= data[1][0][2] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im = cv2.imread(imdb.image_path_at(i)) im2show = np.copy(im) for j in xrange(1, imdb.num_classes): inds = torch.nonzero(scores[:,j]>thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:,j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3)
else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= data[1][0][2] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im = cv2.imread(imdb.image_path_at(i)) im2show = np.copy(im) for j in xrange(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis and j == 7:
def test_nonzero(self): x = torch.tensor([[[2., 2.], [1., 0.]], [[0., 0.], [1., 1.]]], requires_grad=True) self.assertONNX(lambda x: torch.nonzero(x), x)
def forward(self, input): # Algorithm: # # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors rpn_cls_score = input[0] gt_boxes = input[1] im_info = input[2] num_boxes = input[3] # map of shape (..., H, W) height, width = rpn_cls_score.size(2), rpn_cls_score.size(3) batch_size = gt_boxes.size(0) feat_height, feat_width = rpn_cls_score.size(2), rpn_cls_score.size(3) shift_x = np.arange(0, feat_width) * self._feat_stride shift_y = np.arange(0, feat_height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = torch.from_numpy( np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()) shifts = shifts.contiguous().type_as(rpn_cls_score).float() A = self._num_anchors K = shifts.size(0) self._anchors = self._anchors.type_as( gt_boxes) # move to specific gpu. all_anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) all_anchors = all_anchors.view(K * A, 4) total_anchors = int(K * A) keep = ( (all_anchors[:, 0] >= -self._allowed_border) & (all_anchors[:, 1] >= -self._allowed_border) & (all_anchors[:, 2] < long(im_info[0][1]) + self._allowed_border) & (all_anchors[:, 3] < long(im_info[0][0]) + self._allowed_border)) inds_inside = torch.nonzero(keep).view(-1) # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care labels = gt_boxes.new(batch_size, inds_inside.size(0)).fill_(-1) bbox_inside_weights = gt_boxes.new(batch_size, inds_inside.size(0)).zero_() bbox_outside_weights = gt_boxes.new(batch_size, inds_inside.size(0)).zero_() overlaps = bbox_overlaps_batch(anchors, gt_boxes) # [B, n_anchors, 20] max_overlaps, argmax_overlaps = torch.max(overlaps, 2) gt_max_overlaps, _ = torch.max(overlaps, 1) if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 gt_max_overlaps[gt_max_overlaps == 0] = 1e-5 keep = torch.sum( overlaps.eq( gt_max_overlaps.view(batch_size, 1, -1).expand_as(overlaps)), 2) if torch.sum(keep) > 0: labels[keep > 0] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) sum_fg = torch.sum((labels == 1).int(), 1) sum_bg = torch.sum((labels == 0).int(), 1) for i in range(batch_size): # subsample positive labels if we have too many if sum_fg[i] > num_fg: fg_inds = torch.nonzero(labels[i] == 1).view(-1) # torch.randperm seems has a bug on multi-gpu setting that cause the segfault. # See https://github.com/pytorch/pytorch/issues/1868 for more details. # use numpy instead. #rand_num = torch.randperm(fg_inds.size(0)).type_as(gt_boxes).long() rand_num = torch.from_numpy( np.random.permutation( fg_inds.size(0))).type_as(gt_boxes).long() disable_inds = fg_inds[rand_num[:fg_inds.size(0) - num_fg]] labels[i][disable_inds] = -1 # num_bg = cfg.TRAIN.RPN_BATCHSIZE - sum_fg[i] num_bg = cfg.TRAIN.RPN_BATCHSIZE - torch.sum( (labels == 1).int(), 1)[i] # subsample negative labels if we have too many if sum_bg[i] > num_bg: bg_inds = torch.nonzero(labels[i] == 0).view(-1) #rand_num = torch.randperm(bg_inds.size(0)).type_as(gt_boxes).long() rand_num = torch.from_numpy( np.random.permutation( bg_inds.size(0))).type_as(gt_boxes).long() disable_inds = bg_inds[rand_num[:bg_inds.size(0) - num_bg]] labels[i][disable_inds] = -1 offset = torch.arange(0, batch_size) * gt_boxes.size(1) argmax_overlaps = argmax_overlaps + offset.view( batch_size, 1).type_as(argmax_overlaps) bbox_targets = _compute_targets_batch( anchors, gt_boxes.view(-1, 5)[argmax_overlaps.view(-1), :].view( batch_size, -1, 5)) # use a single value instead of 4 values for easy index. bbox_inside_weights[labels == 1] = cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS[0] if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: num_examples = torch.sum(labels[i] >= 0) positive_weights = 1.0 / num_examples.item() negative_weights = 1.0 / num_examples.item() else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) bbox_outside_weights[labels == 1] = positive_weights bbox_outside_weights[labels == 0] = negative_weights labels = _unmap(labels, total_anchors, inds_inside, batch_size, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, batch_size, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, batch_size, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, batch_size, fill=0) outputs = [] labels = labels.view(batch_size, height, width, A).permute(0, 3, 1, 2).contiguous() labels = labels.view(batch_size, 1, A * height, width) outputs.append(labels) bbox_targets = bbox_targets.view(batch_size, height, width, A * 4).permute(0, 3, 1, 2).contiguous() outputs.append(bbox_targets) anchors_count = bbox_inside_weights.size(1) bbox_inside_weights = bbox_inside_weights.view( batch_size, anchors_count, 1).expand(batch_size, anchors_count, 4) bbox_inside_weights = bbox_inside_weights.contiguous().view(batch_size, height, width, 4*A)\ .permute(0,3,1,2).contiguous() outputs.append(bbox_inside_weights) bbox_outside_weights = bbox_outside_weights.view( batch_size, anchors_count, 1).expand(batch_size, anchors_count, 4) bbox_outside_weights = bbox_outside_weights.contiguous().view(batch_size, height, width, 4*A)\ .permute(0,3,1,2).contiguous() outputs.append(bbox_outside_weights) return outputs
def create_non_correspondences(uv_b_matches, img_b_shape, num_non_matches_per_match=100, img_b_mask=None): """ Takes in pixel matches (uv_b_matches) that correspond to matches in another image, and generates non-matches by just sampling in image space. Optionally, the non-matches can be sampled from a mask for image b. Returns non-matches as pixel positions in image b. Please see 'coordinate_conventions.md' documentation for an explanation of pixel coordinate conventions. ## Note that arg uv_b_matches are the outputs of batch_find_pixel_correspondences() :param uv_b_matches: tuple of torch.FloatTensors, where each FloatTensor is length n, i.e.: (torch.FloatTensor, torch.FloatTensor) :param img_b_shape: tuple of (H,W) which is the shape of the image (optional) :param num_non_matches_per_match: int (optional) :param img_b_mask: torch.FloatTensor (can be cuda or not) - masked image, we will select from the non-zero entries - shape is H x W :return: tuple of torch.FloatTensors, i.e. (torch.FloatTensor, torch.FloatTensor). - The first element of the tuple is all "u" pixel positions, and the right element of the tuple is all "v" positions - Each torch.FloatTensor is of shape torch.Shape([num_matches, non_matches_per_match]) - This shape makes it so that each row of the non-matches corresponds to the row for the match in uv_a """ image_width = img_b_shape[1] image_height = img_b_shape[0] if uv_b_matches == None: return None num_matches = len(uv_b_matches[0]) def get_random_uv_b_non_matches(): return pytorch_rand_select_pixel(width=image_width,height=image_height, num_samples=num_matches*num_non_matches_per_match) if img_b_mask is not None: img_b_mask_flat = img_b_mask.view(-1,1).squeeze(1) mask_b_indices_flat = torch.nonzero(img_b_mask_flat) if len(mask_b_indices_flat) == 0: print "warning, empty mask b" uv_b_non_matches = get_random_uv_b_non_matches() else: num_samples = num_matches*num_non_matches_per_match rand_numbers_b = torch.rand(num_samples)*len(mask_b_indices_flat) rand_indices_b = torch.floor(rand_numbers_b).long() randomized_mask_b_indices_flat = torch.index_select(mask_b_indices_flat, 0, rand_indices_b).squeeze(1) uv_b_non_matches = (randomized_mask_b_indices_flat%image_width, randomized_mask_b_indices_flat/image_width) else: uv_b_non_matches = get_random_uv_b_non_matches() # for each in uv_a, we want non-matches # first just randomly sample "non_matches" # we will later move random samples that were too close to being matches uv_b_non_matches = (uv_b_non_matches[0].view(num_matches,num_non_matches_per_match), uv_b_non_matches[1].view(num_matches,num_non_matches_per_match)) # uv_b_matches can now be used to make sure no "non_matches" are too close # to preserve tensor size, rather than pruning, we can perturb these in pixel space copied_uv_b_matches_0 = torch.t(uv_b_matches[0].repeat(num_non_matches_per_match, 1)) copied_uv_b_matches_1 = torch.t(uv_b_matches[1].repeat(num_non_matches_per_match, 1)) diffs_0 = copied_uv_b_matches_0 - uv_b_non_matches[0].type(dtype_float) diffs_1 = copied_uv_b_matches_1 - uv_b_non_matches[1].type(dtype_float) diffs_0_flattened = diffs_0.view(-1,1) diffs_1_flattened = diffs_1.view(-1,1) diffs_0_flattened = torch.abs(diffs_0_flattened).squeeze(1) diffs_1_flattened = torch.abs(diffs_1_flattened).squeeze(1) need_to_be_perturbed = torch.zeros_like(diffs_0_flattened) ones = torch.zeros_like(diffs_0_flattened) num_pixels_too_close = 1.0 threshold = torch.ones_like(diffs_0_flattened)*num_pixels_too_close # determine which pixels are too close to being matches need_to_be_perturbed = where(diffs_0_flattened < threshold, ones, need_to_be_perturbed) need_to_be_perturbed = where(diffs_1_flattened < threshold, ones, need_to_be_perturbed) minimal_perturb = num_pixels_too_close/2 minimal_perturb_vector = (torch.rand(len(need_to_be_perturbed))*2).floor()*(minimal_perturb*2)-minimal_perturb std_dev = 10 random_vector = torch.randn(len(need_to_be_perturbed))*std_dev + minimal_perturb_vector perturb_vector = need_to_be_perturbed*random_vector uv_b_non_matches_0_flat = uv_b_non_matches[0].view(-1,1).type(dtype_float).squeeze(1) uv_b_non_matches_1_flat = uv_b_non_matches[1].view(-1,1).type(dtype_float).squeeze(1) uv_b_non_matches_0_flat = uv_b_non_matches_0_flat + perturb_vector uv_b_non_matches_1_flat = uv_b_non_matches_1_flat + perturb_vector # now just need to wrap around any that went out of bounds # handle wrapping in width lower_bound = 0.0 upper_bound = image_width*1.0 - 1 lower_bound_vec = torch.ones_like(uv_b_non_matches_0_flat) * lower_bound upper_bound_vec = torch.ones_like(uv_b_non_matches_0_flat) * upper_bound uv_b_non_matches_0_flat = where(uv_b_non_matches_0_flat > upper_bound_vec, uv_b_non_matches_0_flat - upper_bound_vec, uv_b_non_matches_0_flat) uv_b_non_matches_0_flat = where(uv_b_non_matches_0_flat < lower_bound_vec, uv_b_non_matches_0_flat + upper_bound_vec, uv_b_non_matches_0_flat) # handle wrapping in height lower_bound = 0.0 upper_bound = image_height*1.0 - 1 lower_bound_vec = torch.ones_like(uv_b_non_matches_1_flat) * lower_bound upper_bound_vec = torch.ones_like(uv_b_non_matches_1_flat) * upper_bound uv_b_non_matches_1_flat = where(uv_b_non_matches_1_flat > upper_bound_vec, uv_b_non_matches_1_flat - upper_bound_vec, uv_b_non_matches_1_flat) uv_b_non_matches_1_flat = where(uv_b_non_matches_1_flat < lower_bound_vec, uv_b_non_matches_1_flat + upper_bound_vec, uv_b_non_matches_1_flat) return (uv_b_non_matches_0_flat.view(num_matches, num_non_matches_per_match), uv_b_non_matches_1_flat.view(num_matches, num_non_matches_per_match))
def _get_bboxes_single(self, cls_scores, bbox_preds, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False): """Transform outputs for a single batch item into bbox predictions. Args: cls_scores (list[Tensor]): Box scores for each scale level Has shape (num_anchors * num_classes, H, W). bbox_preds (list[Tensor]): Box energies / deltas for each scale level with shape (num_anchors * 4, H, W). mlvl_anchors (list[Tensor]): Box reference for each scale level with shape (num_total_anchors, 4). img_shape (tuple[int]): Shape of the input image, (height, width, 3). scale_factor (ndarray): Scale factor of the image arange as (w_scale, h_scale, w_scale, h_scale). cfg (mmcv.Config): Test / postprocessing configuration, if None, test_cfg would be used. rescale (bool): If True, return boxes in original image space. Returns: Tensor: Labeled boxes in shape (n, 5), where the first 4 columns are bounding box positions (tl_x, tl_y, br_x, br_y) and the 5-th column is a score between 0 and 1. """ cfg = self.test_cfg if cfg is None else cfg # bboxes from different level should be independent during NMS, # level_ids are used as labels for batched NMS to separate them level_ids = [] mlvl_scores = [] mlvl_bbox_preds = [] mlvl_valid_anchors = [] for idx in range(len(cls_scores)): rpn_cls_score = cls_scores[idx] rpn_bbox_pred = bbox_preds[idx] assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] rpn_cls_score = rpn_cls_score.permute(1, 2, 0) if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.reshape(-1) scores = rpn_cls_score.sigmoid() else: rpn_cls_score = rpn_cls_score.reshape(-1, 2) # We set FG labels to [0, num_class-1] and BG label to # num_class in RPN head since mmdet v2.5, which is unified to # be consistent with other head since mmdet v2.0. In mmdet v2.0 # to v2.4 we keep BG label as 0 and FG label as 1 in rpn head. scores = rpn_cls_score.softmax(dim=1)[:, 0] rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4) anchors = mlvl_anchors[idx] if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre: # sort is faster than topk # _, topk_inds = scores.topk(cfg.nms_pre) ranked_scores, rank_inds = scores.sort(descending=True) topk_inds = rank_inds[:cfg.nms_pre] scores = ranked_scores[:cfg.nms_pre] rpn_bbox_pred = rpn_bbox_pred[topk_inds, :] anchors = anchors[topk_inds, :] mlvl_scores.append(scores) mlvl_bbox_preds.append(rpn_bbox_pred) mlvl_valid_anchors.append(anchors) level_ids.append( scores.new_full((scores.size(0), ), idx, dtype=torch.long)) scores = torch.cat(mlvl_scores) anchors = torch.cat(mlvl_valid_anchors) rpn_bbox_pred = torch.cat(mlvl_bbox_preds) proposals = self.bbox_coder.decode(anchors, rpn_bbox_pred, max_shape=img_shape) ids = torch.cat(level_ids) if cfg.min_bbox_size > 0: w = proposals[:, 2] - proposals[:, 0] h = proposals[:, 3] - proposals[:, 1] valid_inds = torch.nonzero((w >= cfg.min_bbox_size) & (h >= cfg.min_bbox_size), as_tuple=False).squeeze() if valid_inds.sum().item() != len(proposals): proposals = proposals[valid_inds, :] scores = scores[valid_inds] ids = ids[valid_inds] # TODO: remove the hard coded nms type nms_cfg = dict(type='nms', iou_threshold=cfg.nms_thr) dets, keep = batched_nms(proposals, scores, ids, nms_cfg) return dets[:cfg.nms_post]
def cs_fft(m, n, f, mask, mu, beta, n_iter): """ Recovers an image from a subset of its frequencies using FFTs. Reconstructs an m x n image from the subset f of its frequencies specified by mask, using ADMM with regularization parameter mu, coupling parameter beta, and number of iterations n_iter. Unlike function cs_baseline, this cs_fft uses FFTs. The computations take place on the CPU(s) in numpy when f is a numpy.ndarray and take place on the GPU(s) in ctorch when f is a ctorch.ComplexTensor. _N.B._: mask[0] must be True in order to make the optimization well-posed. Parameters ---------- m : int number of rows in the image being reconstructed n : int number of columns in the image being reconstructed f : numpy.ndarray or ctorch.ComplexTensor potentially nonzero rows (prior to the inverse Fourier transform) mask : numpy.ndarray boolean indicators of the positions of the rows in the full m x n array -- note that the zero frequency entry must be True in order to make the optimization well-posed mu : float regularization parameter beta : float coupling parameter for the ADMM iterations n_iter : int number of ADMM iterations to conduct Returns ------- numpy.ndarray or ctorch.ComplexTensor reconstructed m x n image float objective value at the end of the ADMM iterations (see function adm) """ def image_gradient(x): """ First-order finite-differencing both horizontally and vertically. Computes a first-order finite-difference approximation to the gradient. Parameters ---------- x : numpy.ndarray or ctorch.ComplexTensor image (that is, two-dimensional array) Returns ------- numpy.ndarray or ctorch.ComplexTensor horizontal finite differences of x stacked on top of the vertical finite differences (separating horizontal from vertical via the initial dimension) """ if isinstance(x, np.ndarray): # Wrap the last column of x around to the beginning. x_h = np.hstack((x[:, -1:], x)) # Wrap the last row of x around to the beginning. x_v = np.vstack((x[-1:], x)) # Apply forward differences to the columns of x. d_x = (x_h[:, 1:] - x_h[:, :-1]) # Apply forward differences to the rows of x. d_y = (x_v[1:] - x_v[:-1]) return np.vstack((d_x.ravel(), d_y.ravel())) elif isinstance(x, ctorch.ComplexTensor): # Wrap the last column of x around to the beginning. x_h = ctorch.cat((x[:, -1:], x), dim=1) # Wrap the last row of x around to the beginning. x_v = ctorch.cat((x[-1:], x), dim=0) # Apply forward differences to the columns of x. d_x = (x_h[:, 1:] - x_h[:, :-1]) # Apply forward differences to the rows of x. d_y = (x_v[1:] - x_v[:-1]) return ctorch.cat((d_x, d_y)).view(2, -1) else: raise TypeError('Input must be a numpy.ndarray ' + 'or a ctorch.ComplexTensor.') def image_gradient_T(x): """ Transpose of the operator that function image_gradient implements. Computes the transpose of the matrix given by function image_gradient. Parameters ---------- x : numpy.ndarray or ctorch.ComplexTensor stack of two identically shaped arrays Returns ------- numpy.ndarray or ctorch.ComplexTensor result of applying to x the transpose of function image_gradient """ if isinstance(x, np.ndarray): x_h = x[0] x_v = x[1] # Wrap the first column of x_h around to the end. x_h_ext = np.hstack((x_h, x_h[:, :1])) # Wrap the first row of x_v around to the end. x_v_ext = np.vstack((x_v, x_v[:1])) # Apply forward differences to the columns of x. d_x = x_h_ext[:, :-1] - x_h_ext[:, 1:] # Apply forward differences to the rows of x. d_y = x_v_ext[:-1] - x_v_ext[1:] return d_x + d_y elif isinstance(x, ctorch.ComplexTensor): x_h = x[0] x_v = x[1] # Wrap the first column of x_h around to the end. x_h_ext = ctorch.cat((x_h, x_h[:, :1]), dim=1) # Wrap the first row of x_v around to the end. x_v_ext = ctorch.cat((x_v, x_v[:1]), dim=0) # Apply forward differences to the columns of x. d_x = x_h_ext[:, :-1] - x_h_ext[:, 1:] # Apply forward differences to the rows of x. d_y = x_v_ext[:-1] - x_v_ext[1:] return d_x + d_y else: raise TypeError('Input must be a numpy.ndarray ' + 'or a ctorch.ComplexTensor.') if isinstance(f, np.ndarray): assert f.shape[1] == n assert mask[0] # Rescale f and pad with zeros between the mask samples. Ktf = (mu / beta) * zero_padded(m, n, f, mask) # Calculate the Fourier transform of the convolutional kernels # for finite differences. tx = np.abs(np.fft.fft([1, -1] + [0] * (m - 2)))**2 ty = np.abs(np.fft.fft([1, -1] + [0] * (n - 2)))**2 # Compute the multipliers required to solve formula (2.8) from Tao-Yang # in the Fourier domain. The calculation involves broadcasting the # Fourier transform of the convolutional kernel for horizontal finite # differences over the vertical directions, and broadcasting both the # subsampling mask and the Fourier transform of the convolutional # kernel for vertical finite differences over horizontal directions. multipliers = 1. / (ty + tx[:, None] + (mu / beta) * mask[:, None]) # Initialize the primal (x) and dual (la) solutions to zeros. x = np.zeros((m, n)) la = np.zeros((2, m * n)) # Calculate iterations of alternating minimization. for i in range(n_iter): # Apply shrinkage via formula (2.7) from Tao-Yang, dividing both # arguments of the "max" operator in formula (2.7) by the # denominator of the rightmost factor in formula (2.7). a = image_gradient(x) + la / beta b = scipy.linalg.norm(a, axis=0, keepdims=True) if i > 0: y = a * np.maximum(1 - 1 / (beta * b), 0) else: y = np.zeros((2, m * n)) # Solve formula (2.8) from Tao-Yang in the Fourier domain. c = image_gradient_T((y - la / beta).reshape((2, m, n))) + Ktf x = np.fft.ifft2(np.fft.fft2(c) * multipliers) # Update the Lagrange multipliers via formula (2.9) from Tao-Yang. la = la - beta * (y - image_gradient(x)) # Calculate the loss in formula (1.4) from Tao-Yang... loss = np.linalg.norm(image_gradient(x), axis=0).sum() # ... adding in the term for the fidelity of the reconstruction. loss += np.linalg.norm(np.fft.fft2(x)[mask] / np.sqrt(m * n) - f)**2 * (mu / 2) # Discard the imaginary part of the primal solution, # returning only the real part and the loss. return x.real, loss elif isinstance(f, ctorch.ComplexTensor): assert f.shape[1] == n assert mask[0] # Convert the mask from booleans to long integers. mask_nnz = torch.nonzero(mask).squeeze(1) # Rescale f and pad with zeros between the mask samples. Ktf = zero_padded(m, n, f, mask_nnz) * (mu / beta) # Calculate the Fourier transform of the convolutional kernels # for finite differences. tx = np.abs(np.fft.fft([1, -1] + [0] * (m - 2)))**2 ty = np.abs(np.fft.fft([1, -1] + [0] * (n - 2)))**2 # Compute the multipliers required to solve formula (2.8) from Tao-Yang # in the Fourier domain. The calculation involves broadcasting the # Fourier transform of the convolutional kernel for horizontal finite # differences over the vertical directions, and broadcasting both the # subsampling mask and the Fourier transform of the convolutional # kernel for vertical finite differences over horizontal directions. multipliers = 1. / (ty + tx[:, None] + mask.cpu().numpy()[:, None] * (mu / beta)) multipliers = ctorch.from_numpy(multipliers).cuda() # Initialize the primal (x) and dual (la) solutions to zeros, # creating new ctorch tensors of the same type as f. x = f.new(m, n).zero_() la = f.new(2, m * n).zero_() # Calculate iterations of alternating minimization. for i in range(n_iter): # Apply shrinkage via formula (2.7) from Tao-Yang, dividing both # arguments of the "max" operator in formula (2.7) by the # denominator of the rightmost factor in formula (2.7). a = image_gradient(x) + la / beta b = ctorch.norm(a, p=2, dim=0, keepdim=True) if i > 0: y = a * torch.clamp(1 - 1 / (beta * b), min=0) else: y = f.new(2, m * n).zero_() # Solve formula (2.8) from Tao-Yang in the Fourier domain. c = image_gradient_T((y - la / beta).view(2, m, n)) + Ktf x = ctorch.ifft2(ctorch.fft2(c) * multipliers) # Update the Lagrange multipliers via formula (2.9) from Tao-Yang. la = la - (y - image_gradient(x)) * beta # Calculate the loss in formula (1.4) from Tao-Yang... loss = ctorch.norm(image_gradient(x), p=2, dim=0).sum() # ... adding in the term for the fidelity of the reconstruction. loss += ctorch.norm(ctorch.fft2(x)[mask_nnz] / math.sqrt(m * n) - f)**2 * (mu / 2) # Discard the imaginary part of the primal solution, # returning only the real part and the loss. return x.real, loss.item() else: raise TypeError('Input must be a numpy.ndarray ' + 'or a ctorch.ComplexTensor.')
def getProposals(obj_pc, grids, center, index, scores, data_index, radius=0.022 * np.sqrt(3), local_th=0.011, local_pn=100): center = center.squeeze(0) index = index.squeeze(0) scores = scores.squeeze(0) grids = grids.squeeze(0) obj_pc = obj_pc.squeeze(0) * torch.FloatTensor([0.22 / 2, 0.22 / 2, 0.22 ]).to(obj_pc.device) contact = obj_pc[index] cent_grid_dist_matrix = dist_matrix_torch(center, grids) point_dist = dist_matrix_torch(contact, obj_pc) con_num = index.size(0) grid_num = grids.size(0) pn_num = obj_pc.size(0) # get contact-grid pairs contact_exp = contact.view(-1, 1, 3).expand(-1, grid_num, -1) grids_exp = grids.view(1, -1, 3).expand(con_num, -1, -1) pairs_ = torch.stack([contact_exp, grids_exp], 2).view(-1, 2, 3).unsqueeze(0) pairs = pairs_.cpu() del pairs_ # get positive and negative proposals select = (cent_grid_dist_matrix < radius).float() posi_prop_idx_ = torch.nonzero(select.view(-1)).view(-1) nega_prop_idx_ = torch.nonzero(select.view(-1) == 0).view(-1) posi_prop_idx, nega_prop_idx = posi_prop_idx_.cpu(), nega_prop_idx_.cpu() offsets_ = (grids_exp - center.view(-1, 1, 3)).view(1, -1, 3) / radius offsets = (offsets_ * select.view(1, -1, 1)).cpu() del offsets_ # get proposals scores scores_all_ = scores.view(-1, 1) * select scores_all_ = select * scores_all_ scores_all = scores_all_.view(1, -1).cpu() posi_prop_scores = scores_all_.view(-1)[posi_prop_idx_] posi_idx_ = torch.nonzero(posi_prop_scores).view( -1) # positive proposals associated to positive grasps nega_idx_ = torch.nonzero(posi_prop_scores == 0).view( -1) # positive proposals associated to negative grasps posi_idx, nega_idx = posi_idx_.cpu(), nega_idx_.cpu() anti_label = select.view(1, -1).cpu() # proposals labels del (scores_all_, posi_prop_idx_, nega_prop_idx_, posi_idx_, nega_idx_) # get local points pg_vec = contact_exp - grids_exp # vectors from grids to contacts pg_vec = pg_vec / torch.sqrt(torch.sum(pg_vec**2, -1, keepdim=True)) obj_pc_exp = obj_pc.view(1, -1, 3) #.expand(con_num, -1, -1) pp_vec = obj_pc_exp - contact.view( -1, 1, 3) # vectors from contacts to other points point_dist_view = point_dist.view(con_num, -1, 1) pp_vec = pp_vec / point_dist_view del (obj_pc_exp, contact_exp, grids_exp, cent_grid_dist_matrix, point_dist) data_num = grid_num * con_num * pn_num num = data_num // 5e8 + 1 num = int(num) delta = (con_num + num - 1) // num local_points_list = [] # in case of out of memory for i in range(num): s = delta * i e = delta * (i + 1) if i + 1 == num: e = max(e, con_num) dist_ = point_dist_view[s:e].transpose(1, 2) * ( 1.0 + torch.abs(pg_vec[s:e].matmul(pp_vec[s:e].transpose(1, 2)))) dist = dist_.to('cuda:0') local_points = pu.matrix_k_min(local_th, local_pn, dist).long() local_points_list.append(local_points) del (dist_, dist) local_points = torch.cat(local_points_list, 0).view(1, -1, local_pn).long() assert local_points.size(1) == con_num * grid_num, local_points.size(1) data_index = data_index.new(con_num, grid_num).zero_() + data_index.view( -1, 1) data_index = data_index.view(1, -1) del (select, pg_vec, pp_vec) return pairs, scores_all, offsets, local_points, data_index, anti_label, posi_prop_idx, nega_prop_idx, posi_idx, nega_idx
def _sample_rois_pytorch(self, all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps_batch(all_rois, gt_boxes) max_overlaps, gt_assignment = torch.max(overlaps, 2) batch_size = overlaps.size(0) num_proposal = overlaps.size(1) num_boxes_per_img = overlaps.size(2) offset = torch.arange(0, batch_size)*gt_boxes.size(1) offset = offset.view(-1, 1).type_as(gt_assignment) + gt_assignment labels = gt_boxes[:,:,4].contiguous().view(-1).index((offset.view(-1),)).view(batch_size, -1) labels_batch = labels.new(batch_size, rois_per_image).zero_() rois_batch = all_rois.new(batch_size, rois_per_image, 5).zero_() gt_rois_batch = all_rois.new(batch_size, rois_per_image, 5).zero_() # Guard against the case when an image has fewer than max_fg_rois_per_image # foreground RoIs for i in range(batch_size): fg_inds = torch.nonzero(max_overlaps[i] >= cfg.TRAIN.FG_THRESH).view(-1) fg_num_rois = fg_inds.numel() # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = torch.nonzero((max_overlaps[i] < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps[i] >= cfg.TRAIN.BG_THRESH_LO)).view(-1) bg_num_rois = bg_inds.numel() if fg_num_rois > 0 and bg_num_rois > 0: # sampling fg fg_rois_per_this_image = min(fg_rois_per_image, fg_num_rois) # torch.randperm seems has a bug on multi-gpu setting that cause the segfault. # See https://github.com/pytorch/pytorch/issues/1868 for more details. # use numpy instead. #rand_num = torch.randperm(fg_num_rois).long().cuda() rand_num = torch.from_numpy(np.random.permutation(fg_num_rois)).type_as(gt_boxes).long() fg_inds = fg_inds[rand_num[:fg_rois_per_this_image]] # sampling bg bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image # Seems torch.rand has a bug, it will generate very large number and make an error. # We use numpy rand instead. #rand_num = (torch.rand(bg_rois_per_this_image) * bg_num_rois).long().cuda() rand_num = np.floor(np.random.rand(bg_rois_per_this_image) * bg_num_rois) rand_num = torch.from_numpy(rand_num).type_as(gt_boxes).long() bg_inds = bg_inds[rand_num] elif fg_num_rois > 0 and bg_num_rois == 0: # sampling fg #rand_num = torch.floor(torch.rand(rois_per_image) * fg_num_rois).long().cuda() rand_num = np.floor(np.random.rand(rois_per_image) * fg_num_rois) rand_num = torch.from_numpy(rand_num).type_as(gt_boxes).long() fg_inds = fg_inds[rand_num] fg_rois_per_this_image = rois_per_image bg_rois_per_this_image = 0 elif bg_num_rois > 0 and fg_num_rois == 0: # sampling bg #rand_num = torch.floor(torch.rand(rois_per_image) * bg_num_rois).long().cuda() rand_num = np.floor(np.random.rand(rois_per_image) * bg_num_rois) rand_num = torch.from_numpy(rand_num).type_as(gt_boxes).long() bg_inds = bg_inds[rand_num] bg_rois_per_this_image = rois_per_image fg_rois_per_this_image = 0 else: raise ValueError("bg_num_rois = 0 and fg_num_rois = 0, this should not happen!") # The indices that we're selecting (both fg and bg) keep_inds = torch.cat([fg_inds, bg_inds], 0) # Select sampled values from various arrays: labels_batch[i].copy_(labels[i][keep_inds]) # Clamp labels for the background RoIs to 0 if fg_rois_per_this_image < rois_per_image: labels_batch[i][fg_rois_per_this_image:] = 0 rois_batch[i] = all_rois[i][keep_inds] rois_batch[i,:,0] = i gt_rois_batch[i] = gt_boxes[i][gt_assignment[i][keep_inds]] bbox_target_data = self._compute_targets_pytorch( rois_batch[:,:,1:5], gt_rois_batch[:,:,:4]) bbox_targets, bbox_inside_weights = \ self._get_bbox_regression_labels_pytorch(bbox_target_data, labels_batch, num_classes) return labels_batch, rois_batch, bbox_targets, bbox_inside_weights
def interpolate(self, x_grid, x_target, interp_points=range(-2, 2)): # Do some boundary checking grid_mins = x_grid.min(0)[0] grid_maxs = x_grid.max(0)[0] x_target_min = x_target.min(0)[0] x_target_max = x_target.min(0)[0] lt_min_mask = (x_target_min - grid_mins).lt(-1e-7) gt_max_mask = (x_target_max - grid_maxs).gt(1e-7) if lt_min_mask.sum().item(): first_out_of_range = lt_min_mask.nonzero().squeeze(1)[0].item() raise RuntimeError( ( "Received data that was out of bounds for the specified grid. " "Grid bounds were ({0:.3f}, {0:.3f}), but min = {0:.3f}, " "max = {0:.3f}" ).format( grid_mins[first_out_of_range].item(), grid_maxs[first_out_of_range].item(), x_target_min[first_out_of_range].item(), x_target_max[first_out_of_range].item(), ) ) if gt_max_mask.sum().item(): first_out_of_range = gt_max_mask.nonzero().squeeze(1)[0].item() raise RuntimeError( ( "Received data that was out of bounds for the specified grid. " "Grid bounds were ({0:.3f}, {0:.3f}), but min = {0:.3f}, " "max = {0:.3f}" ).format( grid_mins[first_out_of_range].item(), grid_maxs[first_out_of_range].item(), x_target_min[first_out_of_range].item(), x_target_max[first_out_of_range].item(), ) ) # Now do interpolation interp_points = torch.tensor(interp_points, dtype=x_grid.dtype, device=x_grid.device) interp_points_flip = interp_points.flip(0) num_grid_points = x_grid.size(0) num_target_points = x_target.size(0) num_dim = x_target.size(-1) num_coefficients = len(interp_points) interp_values = torch.ones( num_target_points, num_coefficients ** num_dim, dtype=x_grid.dtype, device=x_grid.device ) interp_indices = torch.zeros( num_target_points, num_coefficients ** num_dim, dtype=torch.long, device=x_grid.device ) for i in range(num_dim): grid_delta = x_grid[1, i] - x_grid[0, i] lower_grid_pt_idxs = torch.floor((x_target[:, i] - x_grid[0, i]) / grid_delta).squeeze() lower_pt_rel_dists = (x_target[:, i] - x_grid[0, i]) / grid_delta - lower_grid_pt_idxs lower_grid_pt_idxs = lower_grid_pt_idxs - interp_points.max() lower_grid_pt_idxs.detach_() if len(lower_grid_pt_idxs.shape) == 0: lower_grid_pt_idxs = lower_grid_pt_idxs.unsqueeze(0) scaled_dist = lower_pt_rel_dists.unsqueeze(-1) + interp_points_flip.unsqueeze(-2) dim_interp_values = self._cubic_interpolation_kernel(scaled_dist) # Find points who's closest lower grid point is the first grid point # This corresponds to a boundary condition that we must fix manually. left_boundary_pts = torch.nonzero(lower_grid_pt_idxs < 1) num_left = len(left_boundary_pts) if num_left > 0: left_boundary_pts.squeeze_(1) x_grid_first = x_grid[:num_coefficients, i].unsqueeze(1).t().expand(num_left, num_coefficients) grid_targets = x_target.select(1, i)[left_boundary_pts].unsqueeze(1).expand(num_left, num_coefficients) dists = torch.abs(x_grid_first - grid_targets) closest_from_first = torch.min(dists, 1)[1] for j in range(num_left): dim_interp_values[left_boundary_pts[j], :] = 0 dim_interp_values[left_boundary_pts[j], closest_from_first[j]] = 1 lower_grid_pt_idxs[left_boundary_pts[j]] = 0 right_boundary_pts = torch.nonzero(lower_grid_pt_idxs > num_grid_points - num_coefficients) num_right = len(right_boundary_pts) if num_right > 0: right_boundary_pts.squeeze_(1) x_grid_last = x_grid[-num_coefficients:, i].unsqueeze(1).t().expand(num_right, num_coefficients) grid_targets = x_target.select(1, i)[right_boundary_pts].unsqueeze(1) grid_targets = grid_targets.expand(num_right, num_coefficients) dists = torch.abs(x_grid_last - grid_targets) closest_from_last = torch.min(dists, 1)[1] for j in range(num_right): dim_interp_values[right_boundary_pts[j], :] = 0 dim_interp_values[right_boundary_pts[j], closest_from_last[j]] = 1 lower_grid_pt_idxs[right_boundary_pts[j]] = num_grid_points - num_coefficients offset = (interp_points - interp_points.min()).long().unsqueeze(-2) dim_interp_indices = lower_grid_pt_idxs.long().unsqueeze(-1) + offset n_inner_repeat = num_coefficients ** i n_outer_repeat = num_coefficients ** (num_dim - i - 1) index_coeff = num_grid_points ** (num_dim - i - 1) dim_interp_indices = dim_interp_indices.unsqueeze(-1).repeat(1, n_inner_repeat, n_outer_repeat) dim_interp_values = dim_interp_values.unsqueeze(-1).repeat(1, n_inner_repeat, n_outer_repeat) interp_indices = interp_indices.add(dim_interp_indices.view(num_target_points, -1).mul(index_coeff)) interp_values = interp_values.mul(dim_interp_values.view(num_target_points, -1)) return interp_indices, interp_values
def forward(self, input): # Algorithm: # # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors rpn_cls_score = input[0] gt_boxes = input[1] im_info = input[2] num_boxes = input[3] # map of shape (..., H, W) height, width = rpn_cls_score.size(2), rpn_cls_score.size(3) batch_size = gt_boxes.size(0) feat_height, feat_width = rpn_cls_score.size(2), rpn_cls_score.size(3) shift_x = np.arange(0, feat_width) * self._feat_stride shift_y = np.arange(0, feat_height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()) shifts = shifts.contiguous().type_as(rpn_cls_score).float() A = self._num_anchors K = shifts.size(0) self._anchors = self._anchors.type_as(gt_boxes) # move to specific gpu. all_anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) all_anchors = all_anchors.view(K * A, 4) total_anchors = int(K * A) keep = ((all_anchors[:, 0] >= -self._allowed_border) & (all_anchors[:, 1] >= -self._allowed_border) & (all_anchors[:, 2] < long(im_info[0][1]) + self._allowed_border) & (all_anchors[:, 3] < long(im_info[0][0]) + self._allowed_border)) inds_inside = torch.nonzero(keep).view(-1) # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care labels = gt_boxes.new(batch_size, inds_inside.size(0)).fill_(-1) bbox_inside_weights = gt_boxes.new(batch_size, inds_inside.size(0)).zero_() bbox_outside_weights = gt_boxes.new(batch_size, inds_inside.size(0)).zero_() overlaps = bbox_overlaps_batch(anchors, gt_boxes) max_overlaps, argmax_overlaps = torch.max(overlaps, 2) gt_max_overlaps, _ = torch.max(overlaps, 1) if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 gt_max_overlaps[gt_max_overlaps==0] = 1e-5 keep = torch.sum(overlaps.eq(gt_max_overlaps.view(batch_size,1,-1).expand_as(overlaps)), 2) if torch.sum(keep) > 0: labels[keep>0] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) sum_fg = torch.sum((labels == 1).int(), 1) sum_bg = torch.sum((labels == 0).int(), 1) for i in range(batch_size): # subsample positive labels if we have too many if sum_fg[i] > num_fg: fg_inds = torch.nonzero(labels[i] == 1).view(-1) # torch.randperm seems has a bug on multi-gpu setting that cause the segfault. # See https://github.com/pytorch/pytorch/issues/1868 for more details. # use numpy instead. #rand_num = torch.randperm(fg_inds.size(0)).type_as(gt_boxes).long() rand_num = torch.from_numpy(np.random.permutation(fg_inds.size(0))).type_as(gt_boxes).long() disable_inds = fg_inds[rand_num[:fg_inds.size(0)-num_fg]] labels[i][disable_inds] = -1 num_bg = cfg.TRAIN.RPN_BATCHSIZE - sum_fg[i] # subsample negative labels if we have too many if sum_bg[i] > num_bg: bg_inds = torch.nonzero(labels[i] == 0).view(-1) #rand_num = torch.randperm(bg_inds.size(0)).type_as(gt_boxes).long() rand_num = torch.from_numpy(np.random.permutation(bg_inds.size(0))).type_as(gt_boxes).long() disable_inds = bg_inds[rand_num[:bg_inds.size(0)-num_bg]] labels[i][disable_inds] = -1 offset = torch.arange(0, batch_size)*gt_boxes.size(1) argmax_overlaps = argmax_overlaps + offset.view(batch_size, 1).type_as(argmax_overlaps) bbox_targets = _compute_targets_batch(anchors, gt_boxes.view(-1,5)[argmax_overlaps.view(-1), :].view(batch_size, -1, 5)) # use a single value instead of 4 values for easy index. bbox_inside_weights[labels==1] = cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS[0] if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: num_examples = torch.sum(labels[i] >= 0) positive_weights = 1.0 / num_examples negative_weights = 1.0 / num_examples else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) bbox_outside_weights[labels == 1] = positive_weights bbox_outside_weights[labels == 0] = negative_weights labels = _unmap(labels, total_anchors, inds_inside, batch_size, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, batch_size, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, batch_size, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, batch_size, fill=0) outputs = [] labels = labels.view(batch_size, height, width, A).permute(0,3,1,2).contiguous() labels = labels.view(batch_size, 1, A * height, width) outputs.append(labels) bbox_targets = bbox_targets.view(batch_size, height, width, A*4).permute(0,3,1,2).contiguous() outputs.append(bbox_targets) anchors_count = bbox_inside_weights.size(1) bbox_inside_weights = bbox_inside_weights.view(batch_size,anchors_count,1).expand(batch_size, anchors_count, 4) bbox_inside_weights = bbox_inside_weights.contiguous().view(batch_size, height, width, 4*A)\ .permute(0,3,1,2).contiguous() outputs.append(bbox_inside_weights) bbox_outside_weights = bbox_outside_weights.view(batch_size,anchors_count,1).expand(batch_size, anchors_count, 4) bbox_outside_weights = bbox_outside_weights.contiguous().view(batch_size, height, width, 4*A)\ .permute(0,3,1,2).contiguous() outputs.append(bbox_outside_weights) return outputs
def forward(self, input, input_map, coords, batch_idxs, batch_offsets, epoch): ''' :param input_map: (N), int, cuda :param coords: (N, 3), float, cuda :param batch_idxs: (N), int, cuda :param batch_offsets: (B + 1), int, cuda ''' ret = {} output = self.input_conv(input) output = self.unet(output) output = self.output_layer(output) output_feats = output.features[input_map.long()] #### semantic segmentation semantic_scores = self.linear(output_feats) # (N, nClass), float semantic_preds = semantic_scores.max(1)[1] # (N), long ret['semantic_scores'] = semantic_scores #### offset pt_offsets_feats = self.offset(output_feats) pt_offsets = self.offset_linear(pt_offsets_feats) # (N, 3), float32 ret['pt_offsets'] = pt_offsets #if(epoch > self.prepare_epochs): #### get prooposal clusters object_idxs = torch.nonzero(semantic_preds > 1).view(-1) batch_idxs_ = batch_idxs[object_idxs] batch_offsets_ = utils.get_batch_offsets(batch_idxs_, input.batch_size) coords_ = coords[object_idxs] pt_offsets_ = pt_offsets[object_idxs] semantic_preds_cpu = semantic_preds[object_idxs].int().cpu() idx_shift, start_len_shift = pointgroup_ops.ballquery_batch_p(coords_ + pt_offsets_, batch_idxs_, batch_offsets_, self.cluster_radius, self.cluster_shift_meanActive) proposals_idx_shift, proposals_offset_shift = pointgroup_ops.bfs_cluster(semantic_preds_cpu, idx_shift.cpu(), start_len_shift.cpu(), self.cluster_npoint_thre) proposals_idx_shift[:, 1] = object_idxs[proposals_idx_shift[:, 1].long()].int() # proposals_idx_shift: (sumNPoint, 2), int, dim 0 for cluster_id, dim 1 for corresponding point idxs in N # proposals_offset_shift: (nProposal + 1), int idx, start_len = pointgroup_ops.ballquery_batch_p(coords_, batch_idxs_, batch_offsets_, self.cluster_radius, self.cluster_meanActive) proposals_idx, proposals_offset = pointgroup_ops.bfs_cluster(semantic_preds_cpu, idx.cpu(), start_len.cpu(), self.cluster_npoint_thre) proposals_idx[:, 1] = object_idxs[proposals_idx[:, 1].long()].int() # proposals_idx: (sumNPoint, 2), int, dim 0 for cluster_id, dim 1 for corresponding point idxs in N # proposals_offset: (nProposal + 1), int proposals_idx_shift[:, 0] += (proposals_offset.size(0) - 1) proposals_offset_shift += proposals_offset[-1] proposals_idx = torch.cat((proposals_idx, proposals_idx_shift), dim=0) proposals_offset = torch.cat((proposals_offset, proposals_offset_shift[1:])) #### proposals voxelization again input_feats, inp_map = self.clusters_voxelization(proposals_idx, proposals_offset, output_feats, coords, self.score_fullscale, self.score_scale, self.mode) #### score score = self.score_unet(input_feats) score = self.score_outputlayer(score) score_feats = score.features[inp_map.long()] # (sumNPoint, C) score_feats = pointgroup_ops.roipool(score_feats, proposals_offset.cuda()) # (nProposal, C) scores = self.score_linear(score_feats) # (nProposal, 1) ret['proposal_scores'] = (scores, score_feats, proposals_idx, proposals_offset) return ret