def get_loss(self, image_a_pred, image_b_pred, mask_a, mask_b):
        loss = 0

        # get the nonzero indices
        mask_a_indices_flat = torch.nonzero(mask_a)
        mask_b_indices_flat = torch.nonzero(mask_b)
        if len(mask_a_indices_flat) == 0:
            return Variable(torch.cuda.LongTensor([0]), requires_grad=True)
        if len(mask_b_indices_flat) == 0:
            return Variable(torch.cuda.LongTensor([0]), requires_grad=True)

        # take 5000 random pixel samples of the object, using the mask
        num_samples = 10000

        rand_numbers_a = (torch.rand(num_samples)*len(mask_a_indices_flat)).cuda()
        rand_indices_a = Variable(torch.floor(rand_numbers_a).type(torch.cuda.LongTensor), requires_grad=False)
        randomized_mask_a_indices_flat = torch.index_select(mask_a_indices_flat, 0, rand_indices_a).squeeze(1)

        rand_numbers_b = (torch.rand(num_samples)*len(mask_b_indices_flat)).cuda()
        rand_indices_b = Variable(torch.floor(rand_numbers_b).type(torch.cuda.LongTensor), requires_grad=False)
        randomized_mask_b_indices_flat = torch.index_select(mask_b_indices_flat, 0, rand_indices_b).squeeze(1)

        # index into the image and get descriptors
        M_margin = 0.5 # margin parameter
        random_img_a_object_descriptors = torch.index_select(image_a_pred, 1, randomized_mask_a_indices_flat)
        random_img_b_object_descriptors = torch.index_select(image_b_pred, 1, randomized_mask_b_indices_flat)
        pixel_wise_loss = (random_img_a_object_descriptors - random_img_b_object_descriptors).pow(2).sum(dim=2)
        pixel_wise_loss = torch.add(pixel_wise_loss, -2*M_margin)
        zeros_vec = torch.zeros_like(pixel_wise_loss)
        loss += torch.max(zeros_vec, pixel_wise_loss).sum()

        return loss
コード例 #2
0
ファイル: loss.py プロジェクト: laycoding/maskrcnn-benchmark
    def __call__(self, anchors, objectness, box_regression, targets):
        """
        Arguments:
            anchors (list[BoxList])
            objectness (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            objectness_loss (Tensor)
            box_loss (Tensor
        """
        anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors]
        labels, regression_targets = self.prepare_targets(anchors, targets)
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1)
        sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1)

        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)

        objectness_flattened = []
        box_regression_flattened = []
        # for each feature level, permute the outputs to make them be in the
        # same format as the labels. Note that the labels are computed for
        # all feature levels concatenated, so we keep the same representation
        # for the objectness and the box_regression
        for objectness_per_level, box_regression_per_level in zip(
            objectness, box_regression
        ):
            N, A, H, W = objectness_per_level.shape
            objectness_per_level = objectness_per_level.permute(0, 2, 3, 1).reshape(
                N, -1
            )
            box_regression_per_level = box_regression_per_level.view(N, -1, 4, H, W)
            box_regression_per_level = box_regression_per_level.permute(0, 3, 4, 1, 2)
            box_regression_per_level = box_regression_per_level.reshape(N, -1, 4)
            objectness_flattened.append(objectness_per_level)
            box_regression_flattened.append(box_regression_per_level)
        # concatenate on the first dimension (representing the feature levels), to
        # take into account the way the labels were generated (with all feature maps
        # being concatenated as well)
        objectness = cat(objectness_flattened, dim=1).reshape(-1)
        box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4)

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)

        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds],
            regression_targets[sampled_pos_inds],
            beta=1.0 / 9,
            size_average=False,
        ) / (sampled_inds.numel())

        objectness_loss = F.binary_cross_entropy_with_logits(
            objectness[sampled_inds], labels[sampled_inds]
        )

        return objectness_loss, box_loss
コード例 #3
0
    def _get_bbox_regression_labels_pytorch(self, bbox_target_data, labels_batch, num_classes):
        """Bounding-box regression targets (bbox_target_data) are stored in a
        compact form b x N x (class, tx, ty, tw, th)

        This function expands those targets into the 4-of-4*K representation used
        by the network (i.e. only one class has non-zero targets).

        Returns:
            bbox_target (ndarray): b x N x 4K blob of regression targets
            bbox_inside_weights (ndarray): b x N x 4K blob of loss weights
        """
        batch_size = labels_batch.size(0)
        rois_per_image = labels_batch.size(1)
        clss = labels_batch
        bbox_targets = bbox_target_data.new(batch_size, rois_per_image, 4).zero_()
        bbox_inside_weights = bbox_target_data.new(bbox_targets.size()).zero_()

        for b in range(batch_size):
            # assert clss[b].sum() > 0
            if clss[b].sum() == 0:
                continue
            inds = torch.nonzero(clss[b] > 0).view(-1)
            for i in range(inds.numel()):
                ind = inds[i]
                bbox_targets[b, ind, :] = bbox_target_data[b, ind, :]
                bbox_inside_weights[b, ind, :] = self.BBOX_INSIDE_WEIGHTS

        return bbox_targets, bbox_inside_weights
コード例 #4
0
    def set_low_quality_matches_(self, matches, all_matches, match_quality_matrix):
        """
        Produce additional matches for predictions that have only low-quality matches.
        Specifically, for each ground-truth find the set of predictions that have
        maximum overlap with it (including ties); for each prediction in that set, if
        it is unmatched, then match it to the ground-truth with which it has the highest
        quality value.
        """
        # For each gt, find the prediction with which it has highest quality
        highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1)
        # Find highest quality match available, even if it is low, including ties
        gt_pred_pairs_of_highest_quality = torch.nonzero(
            match_quality_matrix == highest_quality_foreach_gt[:, None]
        )
        # Example gt_pred_pairs_of_highest_quality:
        #   tensor([[    0, 39796],
        #           [    1, 32055],
        #           [    1, 32070],
        #           [    2, 39190],
        #           [    2, 40255],
        #           [    3, 40390],
        #           [    3, 41455],
        #           [    4, 45470],
        #           [    5, 45325],
        #           [    5, 46390]])
        # Each row is a (gt index, prediction index)
        # Note how gt items 1, 2, 3, and 5 each have two ties

        pred_inds_to_update = gt_pred_pairs_of_highest_quality[:, 1]
        matches[pred_inds_to_update] = all_matches[pred_inds_to_update]
コード例 #5
0
ファイル: loss.py プロジェクト: laycoding/maskrcnn-benchmark
    def subsample(self, proposals, targets):
        """
        This method performs the positive/negative sampling, and return
        the sampled proposals.
        Note: this function keeps a state.

        Arguments:
            proposals (list[BoxList])
            targets (list[BoxList])
        """

        labels, regression_targets = self.prepare_targets(proposals, targets)
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)

        proposals = list(proposals)
        # add corresponding label and regression_targets information to the bounding boxes
        for labels_per_image, regression_targets_per_image, proposals_per_image in zip(
            labels, regression_targets, proposals
        ):
            proposals_per_image.add_field("labels", labels_per_image)
            proposals_per_image.add_field(
                "regression_targets", regression_targets_per_image
            )

        # distributed sampled proposals, that were obtained on all feature maps
        # concatenated via the fg_bg_sampler, into individual feature map levels
        for img_idx, (pos_inds_img, neg_inds_img) in enumerate(
            zip(sampled_pos_inds, sampled_neg_inds)
        ):
            img_sampled_inds = torch.nonzero(pos_inds_img | neg_inds_img).squeeze(1)
            proposals_per_image = proposals[img_idx][img_sampled_inds]
            proposals[img_idx] = proposals_per_image

        self._proposals = proposals
        return proposals
コード例 #6
0
ファイル: dan.py プロジェクト: Pinafore/cl1-hw
def evaluate(data_loader, model, device):
    """
    evaluate the current model, get the accuracy for dev/test set

    Keyword arguments:
    data_loader: pytorch build-in data loader output
    model: model to be evaluated
    device: cpu of gpu
    """

    model.eval()
    num_examples = 0
    error = 0
    for idx, batch in enumerate(data_loader):
        question_text = batch['text'].to(device)
        question_len = batch['len']
        labels = batch['labels']
        ####Your code here

        top_n, top_i = logits.topk(1)
        num_examples += question_text.size(0)
        error += torch.nonzero(top_i.squeeze() - torch.LongTensor(labels)).size(0)
    accuracy = 1 - error / num_examples
    print('accuracy', accuracy)
    return accuracy
コード例 #7
0
    def forward(self, x, boxes):
        """
        Arguments:
            x (list[Tensor]): feature maps for each level
            boxes (list[BoxList]): boxes to be used to perform the pooling operation.
        Returns:
            result (Tensor)
        """
        num_levels = len(self.poolers)
        rois = self.convert_to_roi_format(boxes)
        if num_levels == 1:
            return self.poolers[0](x[0], rois)

        levels = self.map_levels(boxes)

        num_rois = len(rois)
        num_channels = x[0].shape[1]
        output_size = self.output_size[0]

        dtype, device = x[0].dtype, x[0].device
        result = torch.zeros(
            (num_rois, num_channels, output_size, output_size),
            dtype=dtype,
            device=device,
        )
        for level, (per_level_feature, pooler) in enumerate(zip(x, self.poolers)):
            idx_in_level = torch.nonzero(levels == level).squeeze(1)
            rois_per_level = rois[idx_in_level]
            result[idx_in_level] = pooler(per_level_feature, rois_per_level)

        return result
コード例 #8
0
ファイル: Loss.py プロジェクト: xiamengzhou/OpenNMT-py
    def _compute_loss(self, batch, output, target):
        scores = self.generator(self._bottle(output))

        gtruth = target.view(-1)
        if self.confidence < 1:
            tdata = gtruth.data
            mask = torch.nonzero(tdata.eq(self.padding_idx)).squeeze()
            log_likelihood = torch.gather(scores.data, 1, tdata.unsqueeze(1))
            tmp_ = self.one_hot.repeat(gtruth.size(0), 1)
            tmp_.scatter_(1, tdata.unsqueeze(1), self.confidence)
            if mask.dim() > 0:
                log_likelihood.index_fill_(0, mask, 0)
                tmp_.index_fill_(0, mask, 0)
            gtruth = Variable(tmp_, requires_grad=False)
        loss = self.criterion(scores, gtruth)
        if self.confidence < 1:
            # Default: report smoothed ppl.
            # loss_data = -log_likelihood.sum(0)
            loss_data = loss.data.clone()
        else:
            loss_data = loss.data.clone()

        stats = self._stats(loss_data, scores.data, target.view(-1).data)

        return loss, stats
コード例 #9
0
    def predict(self, wm, s, a, ls):
        with torch.no_grad():
            self.embedding, _ = create_emb_layer(wm)
            s_embedded = self.embedding(s)
            a_embedded = self.embedding(a)

            # Average the aspect embedding
            a_new_embedded = torch.zeros(len(s),1,100)
            for i in range(len(a_embedded)):
                if len(torch.nonzero(a_embedded[i])):
                    a_new_embedded[i] = torch.unsqueeze(torch.sum(a_embedded[i], 0)/len(torch.nonzero(a_embedded[i])),0)

            a_embedded = a_new_embedded
            embedded = torch.zeros(len(s),40,200)

            # Concatenate each word in sentence with aspect vector
            zero_tag = torch.zeros(100).cuda()
            for i in range(len(s_embedded)):
                for j in range(40):
                    if j<(ls[i]-1):
                        embedded[i][j] = torch.unsqueeze(torch.cat((s_embedded[i][j].cuda(),torch.squeeze(a_embedded[i].cuda(),0)),0),0)
                    else:
                        embedded[i][j] = torch.unsqueeze(torch.cat((s_embedded[i][j].cuda(),zero_tag),0),0)
            
        out, (h, c) = self.lstm(embedded.cuda())
        hidden = self.dropout(torch.cat((h[-2,:,:], h[-1,:,:]), dim=1))
        hidden2pred = self.fc(hidden)
        pred =  self.softmax(hidden2pred)
                   
        return pred
def random_sample_from_masked_image_torch(img_mask, num_samples):
    """

    :param img_mask: Numpy array [H,W] or torch.Tensor with shape [H,W]
    :type img_mask:
    :param num_samples: an integer
    :type num_samples:
    :return: tuple of torch.LongTensor in (u,v) format. Each torch.LongTensor has shape
    [num_samples]
    :rtype:
    """

    image_height, image_width = img_mask.shape

    if isinstance(img_mask, np.ndarray):
        img_mask_torch = torch.from_numpy(img_mask).float()
    else:
        img_mask_torch = img_mask

    # This code would randomly subsample from the mask
    mask = img_mask_torch.view(image_width*image_height,1).squeeze(1)
    mask_indices_flat = torch.nonzero(mask)
    if len(mask_indices_flat) == 0:
        return (None, None)

    rand_numbers = torch.rand(num_samples)*len(mask_indices_flat)
    rand_indices = torch.floor(rand_numbers).long()
    uv_vec_flattened = torch.index_select(mask_indices_flat, 0, rand_indices).squeeze(1)
    uv_vec = utils.flattened_pixel_locations_to_u_v(uv_vec_flattened, image_width)
    return uv_vec
コード例 #11
0
ファイル: lossfn.py プロジェクト: Fresh-Z/mtcnn_pytorch
    def landmark_loss(self,gt_label,gt_landmark,pred_landmark):
        mask = torch.eq(gt_label,-2)

        chose_index = torch.nonzero(mask.data)
        chose_index = torch.squeeze(chose_index)

        valid_gt_landmark = gt_landmark[chose_index, :]
        valid_pred_landmark = pred_landmark[chose_index, :]
        return self.loss_landmark(valid_pred_landmark, valid_gt_landmark)
コード例 #12
0
def train(train_X, train_Y):
    model.train()
    total_loss = 0.

    for batch, i in enumerate(
            xrange(0, len(train_X.data) - BATCH_SIZE + 1, BATCH_SIZE)):

        digits_correct = 0
        digits_total = 0
        batch_loss = 0.

        X, Y = train_X[i:i + BATCH_SIZE, :, :], train_Y[i:i + BATCH_SIZE, :]

        # # Buffered model
        zero = Variable(torch.zeros(BATCH_SIZE, 3))
        num_iterations = TIME_FN(2 * MAX_LENGTH)
        model.init_model(BATCH_SIZE, X)
        for j in xrange(num_iterations):
            model.forward()
        for j in xrange(MAX_LENGTH):
            model._buffer_out.pop(1.)
            a = model._buffer_out.read(1.)

            # # Normal seq2seq
            # model.init_stack(BATCH_SIZE)
            # for j in xrange(2 * MAX_LENGTH):
            # 	a = model.forward(X[:,j,:])

            indices = Y[:, j] != 2
            valid_a = a[indices.view(-1, 1)].view(-1, 3)
            valid_Y = Y[:, j][indices]

            if len(valid_a) == 0: continue

            _, valid_y_ = torch.max(valid_a, 1)
            digits_total += len(valid_a)
            digits_correct += len(torch.nonzero((valid_y_ == valid_Y).data))
            batch_loss += criterion(valid_a, valid_Y)

        # Add regularization loss and reset the tracker.
        batch_loss += model.get_and_reset_reg_loss()

        # update the weights
        optimizer.zero_grad()
        batch_loss.backward()
        optimizer.step()

        total_loss += batch_loss.data
        if batch % 10 == 9:
            mean_loss = sum(batch_loss.data)
            print "batches {}-{}: loss={:.4f}, acc={:.2f}".format(batch - 9,
                                                                  batch,
                                                                  mean_loss,
                                                                  digits_correct
                                                                  / digits_total)
コード例 #13
0
ファイル: lossfn.py プロジェクト: Fresh-Z/mtcnn_pytorch
 def box_loss(self,gt_label,gt_offset,pred_offset):
     #get the mask element which != 0
     mask = torch.ne(gt_label,0)
     #convert mask to dim index
     chose_index = torch.nonzero(mask)
     chose_index = torch.squeeze(chose_index)
     #only valid element can effect the loss
     valid_gt_offset = gt_offset[chose_index,:]
     valid_pred_offset = pred_offset[chose_index,:]
     valid_pred_offset = torch.squeeze(valid_pred_offset)
     return self.loss_box(valid_pred_offset,valid_gt_offset)
コード例 #14
0
    def forward(self, s, a, ls):
        with torch.no_grad():
            embedded = self.embedding(s.cuda())
            a_embedded = self.embedding(a.cuda())

            # Average the aspect embedding
            a_new_embedded = torch.zeros(len(s),100)
            for i in range(len(a_embedded)):
                if len(torch.nonzero(a_embedded[i])):
                    a_new_embedded[i] = torch.sum(a_embedded[i], 0)/len(torch.nonzero(a_embedded[i]))

            a_embedded = a_new_embedded
            
        out, (h, c) = self.lstm(embedded)
        hidden = self.dropout(torch.cat((h[-2,:,:], h[-1,:,:]), dim=1))
        with torch.no_grad():
            new_embedded = torch.cat((hidden.cuda(), a_embedded.cuda()),1)
        hidden2pred = self.fc(new_embedded)
        pred =  self.softmax(hidden2pred)
                   
        return pred
 def forward(self, x, target):
     assert x.size(1) == self.size
     true_dist = x.data.clone()
     true_dist.fill_(self.smoothing / (self.size - 2))
     true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence)
     true_dist[:, self.padding_idx] = 0
     mask = torch.nonzero(target.data == self.padding_idx)
     if mask.dim() > 0:
         true_dist.index_fill_(0, mask.squeeze(), 0.0)
     self.true_dist = true_dist
     loss = self.criterion(x, Variable(true_dist, requires_grad=False))
     return loss
コード例 #16
0
def _nonzero_counter_hook(module, inputs, output):
  """
  Module hook used to count the number of nonzero floating point values from
  all the tensors used by the given network during inference. This hook will be
  called every time before :func:`forward` is invoked.

  See :func:`torch.nn.Module.register_forward_hook`
  """
  if not hasattr(module, "__counter_nonzero__"):
    raise ValueError("register_counter_nonzero was not called for this network")

  if module.training:
    return

  size = module.__counter_nonzero__.get("input", 0)
  size += sum([torch.nonzero(i).size(0) for i in inputs])
  module.__counter_nonzero__["input"] = size

  size = module.__counter_nonzero__.get("output", 0)
  size += torch.nonzero(output).size(0)
  module.__counter_nonzero__["output"] = size

  for name, param in module._parameters.items():
    if param is None:
      continue

    size = module.__counter_nonzero__.get(name, 0)
    size += torch.nonzero(param.data).size(0)
    module.__counter_nonzero__[name] = size

  for name, buffer in module._buffers.items():
    if buffer is None:
      continue

    size = module.__counter_nonzero__.get(name, 0)
    size += torch.nonzero(buffer).size(0)
    module.__counter_nonzero__[name] = size
コード例 #17
0
ファイル: loss.py プロジェクト: laycoding/maskrcnn-benchmark
    def __call__(self, class_logits, box_regression):
        """
        Computes the loss for Faster R-CNN.
        This requires that the subsample method has been called beforehand.

        Arguments:
            class_logits (list[Tensor])
            box_regression (list[Tensor])

        Returns:
            classification_loss (Tensor)
            box_loss (Tensor)
        """

        class_logits = cat(class_logits, dim=0)
        box_regression = cat(box_regression, dim=0)
        device = class_logits.device

        if not hasattr(self, "_proposals"):
            raise RuntimeError("subsample needs to be called before")

        proposals = self._proposals

        labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0)
        regression_targets = cat(
            [proposal.get_field("regression_targets") for proposal in proposals], dim=0
        )

        classification_loss = F.cross_entropy(class_logits, labels)

        # get indices that correspond to the regression targets for
        # the corresponding ground truth labels, to be used with
        # advanced indexing
        sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1)
        labels_pos = labels[sampled_pos_inds_subset]
        map_inds = 4 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3], device=device)

        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds_subset[:, None], map_inds],
            regression_targets[sampled_pos_inds_subset],
            size_average=False,
            beta=1,
        )
        box_loss = box_loss / labels.numel()

        return classification_loss, box_loss
コード例 #18
0
ファイル: reverse.py プロジェクト: simonjmendelsohn/StackNN
    def _evaluate_step(self, x, y, a, j):
        """
        Computes the loss, number of guesses correct, and total number
        of guesses at the jth time step. The loss for a string is
        considered to be 0 if the neural network is still reading the
        input string.

        :type x: Variable
        :param x: The input data, represented as a 3D tensor. Each
            example consists of a string of 0s and 1s, followed by
            "null"s. All symbols are in one-hot representation

        :type y: Variable
        :param y: The output data, represented as a 2D tensor. Each
            example consists of a sequence of "null"s, followed by a
            string backwards. All symbols are represented numerically

        :type a: Variable
        :param a: The output of the neural network at the jth time step,
            represented as a 2D vector. For each i, a[i, :] is the
            output of the neural network at the jth time step, in one-
            hot representation

        :type j: int
        :param j: This function is called during the jth time step of
            the neural network's computation

        :rtype: tuple
        :return: The loss, number of correct guesses, and number of
            total guesses at the jth time step
        """
        indices = (y[:, j] != self.alphabet[self.null])
        # Indexing semantics in the line below were changed in different versions of pytorch.
        valid_a = a[indices.view(-1)].view(-1, self.alphabet_size)
        valid_y = y[:, j][indices]
        if len(valid_a) == 0:
            return None, None, None

        _, valid_y_ = torch.max(valid_a, 1)

        total = len(valid_a)
        correct = len(torch.nonzero((valid_y_ == valid_y).data))
        loss = self.criterion(valid_a, valid_y)
        return loss, correct, total
    def non_match_descriptor_loss(image_a_pred, image_b_pred, non_matches_a, non_matches_b, M=0.5, invert=False):
        """
        Computes the max(0, M - D(I_a,I_b,u_a,u_b))^2 term

        This is effectively:       "a and b should be AT LEAST M away from each other"
        With invert=True, this is: "a and b should be AT MOST  M away from each other" 

         :param image_a_pred: Output of DCN network on image A.
        :type image_a_pred: torch.Variable(torch.FloatTensor) shape [1, W * H, D]
        :param image_b_pred: same as image_a_pred
        :type image_b_pred:
        :param non_matches_a: torch.Variable(torch.FloatTensor) has shape [num_non_matches,],  a (u,v) pair is mapped
        to (u,v) ---> image_width * v + u, this matches the shape of image_a_pred
        :type non_matches_a: torch.Variable(torch.FloatTensor)
        :param non_matches_b: same as non_matches_a
        :param M: the margin
        :type M: float
        :return: torch.FloatTensor with shape torch.Shape([num_non_matches])
        :rtype:
        """

        non_matches_a_descriptors = torch.index_select(image_a_pred, 1, non_matches_a).squeeze()
        non_matches_b_descriptors = torch.index_select(image_b_pred, 1, non_matches_b).squeeze()

        # crazily enough, if there is only one element to index_select into
        # above, then the first dimension is collapsed down, and we end up 
        # with shape [D,], where we want [1,D]
        # this unsqueeze fixes that case
        if len(non_matches_a) == 1:
            non_matches_a_descriptors = non_matches_a_descriptors.unsqueeze(0)
            non_matches_b_descriptors = non_matches_b_descriptors.unsqueeze(0)

        norm_degree = 2
        non_match_loss = (non_matches_a_descriptors - non_matches_b_descriptors).norm(norm_degree, 1)
        if not invert:
            non_match_loss = torch.clamp(M - non_match_loss, min=0).pow(2)
        else:
            non_match_loss = torch.clamp(non_match_loss - M, min=0).pow(2)

        hard_negative_idxs = torch.nonzero(non_match_loss)
        num_hard_negatives = len(hard_negative_idxs)

        return non_match_loss, num_hard_negatives, non_matches_a_descriptors, non_matches_b_descriptors
コード例 #20
0
    def forward(self, s, a, ls):
        with torch.no_grad():
            embedded = self.embedding(s.cuda())
            a_embedded = self.embedding(a.cuda())

            # Average the aspect embedding
            a_new_embedded = torch.zeros(len(s),1,100)
            for i in range(len(a_embedded)):
                if len(torch.nonzero(a_embedded[i])):
                    a_new_embedded[i] = torch.unsqueeze(torch.sum(a_embedded[i].cuda(), 0)/len(torch.nonzero(a_embedded[i].cuda())),0)

            a_embedded = a_new_embedded
            """
            embedded = torch.zeros(len(s),20,200)

            # Concatenate each word in sentence with aspect vector
            zero_tag = torch.zeros(100)
            for i in range(len(s_embedded)):
                for j in range(20):
                    if j<(ls[i]-1):
                        embedded[i][j] = torch.unsqueeze(torch.cat((s_embedded[i][j],torch.squeeze(a_embedded[i],0)),0),0)
                    else:
                        embedded[i][j] = torch.unsqueeze(torch.cat((s_embedded[i][j],zero_tag),0),0)
            """
            
        out, (h, c) = self.lstm1(embedded)
        with torch.no_grad():
            new_embedded = torch.zeros(len(s), 20, 612)
            zero_tag = torch.zeros(100).cuda()
            for i in range(len(out)):
                for j in range(20):
                    if j<(ls[i]-1):
                        new_embedded[i][j] = torch.unsqueeze(torch.cat((out[i][j].cuda(),torch.squeeze(a_embedded[i].cuda(),0)),0),0)
                    else:
                        new_embedded[i][j] = torch.unsqueeze(torch.cat((out[i][j].cuda(),zero_tag),0),0)

        out2, (h2, c2) = self.lstm2(new_embedded.cuda())
        hidden = self.dropout(torch.cat((h2[-2,:,:], h2[-1,:,:]), dim=1))
        hidden2pred = self.fc(hidden)
        pred =  self.softmax(hidden2pred)
                   
        return pred
コード例 #21
0
    def select_top_predictions(self, predictions):
        """
        Select only predictions which have a `score` > self.confidence_threshold,
        and returns the predictions in descending order of score

        Arguments:
            predictions (BoxList): the result of the computation by the model.
                It should contain the field `scores`.

        Returns:
            prediction (BoxList): the detected objects. Additional information
                of the detection properties can be found in the fields of
                the BoxList via `prediction.fields()`
        """
        scores = predictions.get_field("scores")
        keep = torch.nonzero(scores > self.confidence_threshold).squeeze(1)
        predictions = predictions[keep]
        scores = predictions.get_field("scores")
        _, idx = scores.sort(0, descending=True)
        return predictions[idx]
コード例 #22
0
ファイル: box_utils.py プロジェクト: g0josh/mtcnn
def _nms(boxes, overlap_threshold=0.5, mode='union'):
    # This native torch implementation is slow
    # on cuda for cuda tensors
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
    scores = boxes[:, 4]

    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
    _, order = scores.sort(dim=0, descending=True)
    ind_buffer = torch.zeros(scores.shape, dtype=torch.long)
    i = 0
    while order.size()[0] > 1:
        ind_buffer[i] = order[0]
        i += 1
        xx1 = torch.max(x1[order[0]], x1[order[1:]])
        yy1 = torch.max(y1[order[0]], y1[order[1:]])
        xx2 = torch.min(x2[order[0]], x2[order[1:]])
        yy2 = torch.min(y2[order[0]], y2[order[1:]])

        # w = F.relu(xx2 - xx1)
        # h = F.relu(yy2 - yy1)
        w = torch.clamp(xx2 - xx1 + 1, min=0)
        h = torch.clamp(yy2 - yy1 + 1, min=0)
        inter = w * h
        if mode == 'min':
            ovr = inter / torch.min(areas[order[0]], areas[order[1:]])
        else:
            ovr = inter / (areas[order[0]] + areas[order[1:]] - inter)

        inds = torch.nonzero(ovr <= overlap_threshold).squeeze()
        if inds.dim():
            order = order[(inds + 1)]
        else:
            break
    keep = ind_buffer[:i]
    return keep
コード例 #23
0
def evaluate(test_X, test_Y):
    model.eval()
    total_loss = 0.
    digits_correct = 0
    digits_total = 0

    len_X = test_X.size(0)

    # # Buffered model
    zero = Variable(torch.zeros(len_X, 3))
    num_iterations = TIME_FN(2 * MAX_LENGTH)
    model.init_model(len_X, test_X)
    for j in xrange(num_iterations):
        model.forward()
    for j in xrange(MAX_LENGTH):
        model._buffer_out.pop(1.)
        a = model._buffer_out.read(1.)

        # # Normal seq2seq
        # model.init_stack(len(test_X.data))
        # for j in xrange(2 * MAX_LENGTH):
        # 	a = model.forward(test_X[:,j,:])

        indices = test_Y[:, j] != 2
        valid_a = a[indices.view(-1, 1)].view(-1, 3)
        valid_Y = test_Y[:, j][indices]

        if len(valid_a) == 0: continue

        _, valid_y_ = torch.max(valid_a, 1)
        digits_total += len(valid_a)
        digits_correct += len(torch.nonzero((valid_y_ == valid_Y).data))
        total_loss += criterion(valid_a, valid_Y)

    mean_loss = sum(total_loss.data)
    print "epoch {}: loss={:.4f}, acc={:.2f}".format(epoch, mean_loss,
                                                     digits_correct /
                                                     digits_total)
コード例 #24
0
    def _evaluate_step(self, x, y, a, j):
        """
        Computes the loss, number of guesses correct, and total number
        of guesses at the jth time step.

        :type x: Variable
        :param x: The input data, represented as a 3D tensor

        :type y: Variable
        :param y: The output data, represented as a 2D tensor

        :type a: Variable
        :param a: The output of the neural network at the jth time step,
            represented as a 2D vector

        :type j: int
        :param j: This function is called during the jth time step of
            the neural network's computation

        :rtype: tuple
        :return: The loss, number of correct guesses, and number of
            total guesses at the jth time step
        """
        indices = (y[:, j] != self.alphabet[self.null])
        # Indexing conventions changed with PyTorch version.
        valid_a = a[indices.view(-1)].view(-1, self.alphabet_size)
        valid_y = y[:, j][indices]
        if len(valid_a) == 0:
            return None, None, None

        _, valid_y_ = torch.max(valid_a, 1)

        total = len(valid_a)
        correct = len(torch.nonzero((valid_y_ == valid_y).data))
        loss = self.criterion(valid_a, valid_y)

        return loss, correct, total
コード例 #25
0
    def assign_wrt_overlaps(self, overlaps, gt_labels=None):
        """Assign w.r.t. the overlaps of bboxes with gts.
        -1:代表ignore
        0:代表背景
        其他值:gt_labels对应值
        Args:
            overlaps (Tensor): Overlaps between k gt_bboxes and n bboxes,
                shape(k, n).
            gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ).

        Returns:
            :obj:`AssignResult`: The assign result.
        """
        num_gts, num_bboxes = overlaps.size(0), overlaps.size(1)

        # 1. assign -1 by default
        # assigned_gt_inds:每个proposal对应的gt的id
        assigned_gt_inds = overlaps.new_full((num_bboxes, ),
                                             -1,
                                             dtype=torch.long)

        # 原来没有
        if num_gts == 0 or num_bboxes == 0:
            # No ground truth or boxes, return empty assignment
            max_overlaps = overlaps.new_zeros((num_bboxes, ))
            if num_gts == 0:
                # No truth, assign everything to background
                assigned_gt_inds[:] = 0
            if gt_labels is None:
                assigned_labels = None
            else:
                assigned_labels = overlaps.new_zeros((num_bboxes, ),
                                                     dtype=torch.long)
            return AssignResult(num_gts,
                                assigned_gt_inds,
                                max_overlaps,
                                labels=assigned_labels)

        # for each anchor, which gt best overlaps with it
        # for each anchor, the max iou of all gts
        max_overlaps, argmax_overlaps = overlaps.max(dim=0)
        # for each gt, which anchor best overlaps with it
        # for each gt, the max iou of all proposals
        gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=1)

        # 2. assign negative: below
        # 如果proposal和gt的最大iou小于一定阈值(neg_iou_thr)置0
        if isinstance(self.neg_iou_thr, float):
            assigned_gt_inds[(max_overlaps >= 0)
                             & (max_overlaps < self.neg_iou_thr)] = 0
        elif isinstance(self.neg_iou_thr, tuple):
            assert len(self.neg_iou_thr) == 2
            assigned_gt_inds[(max_overlaps >= self.neg_iou_thr[0])
                             & (max_overlaps < self.neg_iou_thr[1])] = 0

        # 3. assign positive: above positive IoU threshold
        # 如果proposal和gt的最大iou大于一定阈值(pos_iou_thr),置是第几个gt
        # +1:id(第几个gt)从0开始,从而和0避开
        pos_inds = max_overlaps >= self.pos_iou_thr
        assigned_gt_inds[pos_inds] = argmax_overlaps[pos_inds] + 1

        # 4. assign fg: for each gt, proposals with highest IoU
        # 对于每个gt存在和proposal的IOU大于一定阈值(min_pos_iou),置是第几个gt
        for i in range(num_gts):
            if gt_max_overlaps[i] >= self.min_pos_iou:
                if self.gt_max_assign_all:
                    # 是否所有与该gt具有该IOU值都置是第几个gt
                    max_iou_inds = overlaps[i, :] == gt_max_overlaps[i]
                    assigned_gt_inds[max_iou_inds] = i + 1
                else:
                    assigned_gt_inds[gt_argmax_overlaps[i]] = i + 1

        # 根据assigned_gt_inds生成assigned_labels,即保存proposal对应gt的label
        if gt_labels is not None:
            assigned_labels = assigned_gt_inds.new_zeros((num_bboxes, ))
            pos_inds = torch.nonzero(assigned_gt_inds > 0).squeeze()
            if pos_inds.numel() > 0:
                # -1:因为和gt的id差1
                assigned_labels[pos_inds] = gt_labels[
                    assigned_gt_inds[pos_inds] - 1]
        else:
            assigned_labels = None

        return AssignResult(num_gts,
                            assigned_gt_inds,
                            max_overlaps,
                            labels=assigned_labels)
コード例 #26
0
def selectProposal(posi_prop_idx,
                   nega_prop_idx,
                   posi_idx,
                   nega_idx,
                   max_prop=4000,
                   ratio1=0.5,
                   max_grasp=2000,
                   ratio2=0.5):
    posi_prop_idx = posi_prop_idx.view(-1)
    nega_prop_idx = nega_prop_idx.view(-1)
    posi_idx = posi_idx.view(-1)
    nega_idx = nega_idx.view(-1)

    posi_num = posi_idx.size(0)
    nega_num = nega_idx.size(0)
    posi_prop_num = posi_prop_idx.size(0)
    nega_prop_num = nega_prop_idx.size(0)

    posi_num_exp = int(max_grasp * ratio2)
    nega_num_exp = max_grasp - posi_num_exp
    posi_prop_num_exp = int(max_prop * ratio1)
    nega_prop_num_exp = max_prop - posi_prop_num_exp

    if posi_num < posi_num_exp:
        choice = torch.cat([
            torch.arange(0, posi_num).cuda().long(),
            torch.randint(posi_num, (posi_num_exp - posi_num, )).cuda().long()
        ], 0)
        posi_idx = posi_idx[choice.long()]
    else:
        choice = torch.LongTensor(
            np.random.choice(posi_num, posi_num_exp, replace=False)).cuda()
        posi_idx = posi_idx[choice]

    if nega_num < nega_num_exp:
        choice = torch.cat([
            torch.arange(0, nega_num).cuda().long(),
            torch.randint(nega_num, (nega_num_exp - nega_num, )).cuda().long()
        ], 0)
        nega_idx = nega_idx[choice.long()]
    else:
        choice = torch.LongTensor(
            np.random.choice(nega_num, nega_num_exp, replace=False)).cuda()
        nega_idx = nega_idx[choice]

    if nega_prop_num < nega_prop_num_exp:
        choice = torch.cat([
            torch.arange(0, nega_prop_num).cuda().long(),
            torch.randint(nega_prop_num,
                          (nega_prop_num_exp - nega_prop_num, )).cuda().long()
        ], 0)
        nega_prop_idx = nega_prop_idx[choice.long()]
    else:
        choice = torch.LongTensor(
            np.random.choice(nega_prop_num, nega_prop_num_exp,
                             replace=False)).cuda()
        nega_prop_idx = nega_prop_idx[choice]

    if posi_prop_num_exp > max_grasp:
        select = posi_prop_idx.new(posi_prop_num).zero_()
        select[posi_idx] = 1
        select[nega_idx] = 1
        un_select = torch.nonzero(select == 0).view(-1)
        choice = torch.LongTensor(
            np.random.choice(un_select.size(0),
                             posi_prop_num_exp - max_grasp)).cuda()
        un_select = un_select[choice]
        posi_idx = posi_prop_idx[posi_idx]
        nega_idx = posi_prop_idx[nega_idx]
        posi_prop_idx = torch.cat(
            [posi_idx, nega_idx, posi_prop_idx[un_select]], 0)
    else:
        posi_idx = posi_prop_idx[posi_idx]
        nega_idx = posi_prop_idx[nega_idx]
        posi_prop_idx = torch.cat([posi_idx, nega_idx], 0)

    return posi_prop_idx, nega_prop_idx, posi_idx, nega_idx
def batch_find_pixel_correspondences(img_a_depth, img_a_pose, img_b_depth, img_b_pose, 
                                        uv_a=None, num_attempts=20, device='CPU', img_a_mask=None, K=None):
    """
    Computes pixel correspondences in batch

    :param img_a_depth: depth image for image a
    :type  img_a_depth: numpy 2d array (H x W) encoded as a uint16
    --
    :param img_a_pose:  pose for image a, in right-down-forward optical frame
    :type  img_a_pose:  numpy 2d array, 4 x 4 (homogeneous transform)
    --
    :param img_b_depth: depth image for image b
    :type  img_b_depth: numpy 2d array (H x W) encoded as a uint16
    -- 
    :param img_b_pose:  pose for image a, in right-down-forward optical frame
    :type  img_b_pose:  numpy 2d array, 4 x 4 (homogeneous transform)
    -- 
    :param uv_a:        optional arg, a tuple of (u,v) pixel positions for which to find matches
    :type  uv_a:        each element of tuple is either an int, or a list-like (castable to torch.LongTensor)
    --
    :param num_attempts: if random sampling, how many pixels will be _attempted_ to find matches for.  Note that
                            this is not the same as asking for a specific number of matches, since many attempted matches
                            will either be occluded or outside of field-of-view. 
    :type  num_attempts: int
    --
    :param device:      either 'CPU' or 'CPU'
    :type  device:      string
    --
    :param img_a_mask:  optional arg, an image where each nonzero pixel will be used as a mask
    :type  img_a_mask:  ndarray, of shape (H, W)
    --
    :param K:           optional arg, an image where each nonzero pixel will be used as a mask
    :type  K:           ndarray, of shape (H, W)
    --
    :return:            "Tuple of tuples", i.e. pixel position tuples for image a and image b (uv_a, uv_b). 
                        Each of these is a tuple of pixel positions
    :rtype:             Each of uv_a is a tuple of torch.FloatTensors
    """
    assert (img_a_depth.shape == img_b_depth.shape)
    image_width  = img_a_depth.shape[1]
    image_height = img_b_depth.shape[0]

    global dtype_float
    global dtype_long
    if device == 'CPU':
        dtype_float = torch.FloatTensor
        dtype_long = torch.LongTensor
    if device =='GPU':
        dtype_float = torch.cuda.FloatTensor
        dtype_long = torch.cuda.LongTensor

    if uv_a is None:
        uv_a = pytorch_rand_select_pixel(width=image_width,height=image_height, num_samples=num_attempts)
    else:
        uv_a = (torch.LongTensor([uv_a[0]]).type(dtype_long), torch.LongTensor([uv_a[1]]).type(dtype_long))
        num_attempts = 1

    if img_a_mask is None:
        uv_a_vec = (torch.ones(num_attempts).type(dtype_long)*uv_a[0],torch.ones(num_attempts).type(dtype_long)*uv_a[1])
        uv_a_vec_flattened = uv_a_vec[1]*image_width+uv_a_vec[0]
    else:
        img_a_mask = torch.from_numpy(img_a_mask).type(dtype_float)  
        
        # Option A: This next line samples from img mask
        uv_a_vec = random_sample_from_masked_image_torch(img_a_mask, num_samples=num_attempts)
        if uv_a_vec[0] is None:
            return (None, None)
        
        # Option B: These 4 lines grab ALL from img mask
        # mask_a = img_a_mask.squeeze(0)
        # mask_a = mask_a/torch.max(mask_a)
        # nonzero = (torch.nonzero(mask_a)).type(dtype_long)
        # uv_a_vec = (nonzero[:,1], nonzero[:,0])

        # Always use this line        
        uv_a_vec_flattened = uv_a_vec[1]*image_width+uv_a_vec[0]


    if K is None:
        K = get_default_K_matrix()

    K_inv = inv(K)
    body_to_rdf = get_body_to_rdf()
    rdf_to_body = inv(body_to_rdf)

    img_a_depth_torch = torch.from_numpy(img_a_depth).type(dtype_float)
    img_a_depth_torch = torch.squeeze(img_a_depth_torch, 0)
    img_a_depth_torch = img_a_depth_torch.view(-1,1)

    
    depth_vec = torch.index_select(img_a_depth_torch, 0, uv_a_vec_flattened)*1.0/DEPTH_IM_SCALE
    depth_vec = depth_vec.squeeze(1)
    
    # Prune based on
    # Case 1: depth is zero (for this data, this means no-return)
    nonzero_indices = torch.nonzero(depth_vec)
    if nonzero_indices.dim() == 0:
        return (None, None)
    nonzero_indices = nonzero_indices.squeeze(1)
    depth_vec = torch.index_select(depth_vec, 0, nonzero_indices)

    # prune u_vec and v_vec, then multiply by already pruned depth_vec
    u_a_pruned = torch.index_select(uv_a_vec[0], 0, nonzero_indices)
    u_vec = u_a_pruned.type(dtype_float)*depth_vec

    v_a_pruned = torch.index_select(uv_a_vec[1], 0, nonzero_indices)
    v_vec = v_a_pruned.type(dtype_float)*depth_vec

    z_vec = depth_vec

    full_vec = torch.stack((u_vec, v_vec, z_vec))

    K_inv_torch = torch.from_numpy(K_inv).type(dtype_float)
    point_camera_frame_rdf_vec = K_inv_torch.mm(full_vec)

    point_world_frame_rdf_vec = apply_transform_torch(point_camera_frame_rdf_vec, torch.from_numpy(img_a_pose).type(dtype_float))
    point_camera_2_frame_rdf_vec = apply_transform_torch(point_world_frame_rdf_vec, torch.from_numpy(invert_transform(img_b_pose)).type(dtype_float))

    K_torch = torch.from_numpy(K).type(dtype_float)
    vec2_vec = K_torch.mm(point_camera_2_frame_rdf_vec)

    u2_vec = vec2_vec[0]/vec2_vec[2]
    v2_vec = vec2_vec[1]/vec2_vec[2]

    maybe_z2_vec = point_camera_2_frame_rdf_vec[2]

    z2_vec = vec2_vec[2]

    # Prune based on
    # Case 2: the pixels projected into image b are outside FOV
    # u2_vec bounds should be: 0, image_width
    # v2_vec bounds should be: 0, image_height

    ## do u2-based pruning
    u2_vec_lower_bound = 0.0
    epsilon = 1e-3
    u2_vec_upper_bound = image_width*1.0 - epsilon  # careful, needs to be epsilon less!!
    lower_bound_vec = torch.ones_like(u2_vec) * u2_vec_lower_bound
    upper_bound_vec = torch.ones_like(u2_vec) * u2_vec_upper_bound
    zeros_vec       = torch.zeros_like(u2_vec)

    u2_vec = where(u2_vec < lower_bound_vec, zeros_vec, u2_vec)
    u2_vec = where(u2_vec > upper_bound_vec, zeros_vec, u2_vec)
    in_bound_indices = torch.nonzero(u2_vec)
    if in_bound_indices.dim() == 0:
        return (None, None)
    in_bound_indices = in_bound_indices.squeeze(1)

    # apply pruning
    u2_vec = torch.index_select(u2_vec, 0, in_bound_indices)
    v2_vec = torch.index_select(v2_vec, 0, in_bound_indices)
    z2_vec = torch.index_select(z2_vec, 0, in_bound_indices)
    u_a_pruned = torch.index_select(u_a_pruned, 0, in_bound_indices) # also prune from first list
    v_a_pruned = torch.index_select(v_a_pruned, 0, in_bound_indices) # also prune from first list

    ## do v2-based pruning
    v2_vec_lower_bound = 0.0
    v2_vec_upper_bound = image_height*1.0 - epsilon
    lower_bound_vec = torch.ones_like(v2_vec) * v2_vec_lower_bound
    upper_bound_vec = torch.ones_like(v2_vec) * v2_vec_upper_bound
    zeros_vec       = torch.zeros_like(v2_vec)    

    v2_vec = where(v2_vec < lower_bound_vec, zeros_vec, v2_vec)
    v2_vec = where(v2_vec > upper_bound_vec, zeros_vec, v2_vec)
    in_bound_indices = torch.nonzero(v2_vec)
    if in_bound_indices.dim() == 0:
        return (None, None)
    in_bound_indices = in_bound_indices.squeeze(1)

    # apply pruning
    u2_vec = torch.index_select(u2_vec, 0, in_bound_indices)
    v2_vec = torch.index_select(v2_vec, 0, in_bound_indices)
    z2_vec = torch.index_select(z2_vec, 0, in_bound_indices)
    u_a_pruned = torch.index_select(u_a_pruned, 0, in_bound_indices) # also prune from first list
    v_a_pruned = torch.index_select(v_a_pruned, 0, in_bound_indices) # also prune from first list

    # Prune based on
    # Case 3: the pixels in image b are occluded, OR there is no depth return in image b so we aren't sure

    img_b_depth_torch = torch.from_numpy(img_b_depth).type(dtype_float)
    img_b_depth_torch = torch.squeeze(img_b_depth_torch, 0)
    img_b_depth_torch = img_b_depth_torch.view(-1,1)

    uv_b_vec_flattened = (v2_vec.type(dtype_long)*image_width+u2_vec.type(dtype_long))  # simply round to int -- good enough 
                                                                       # occlusion check for smooth surfaces

    depth2_vec = torch.index_select(img_b_depth_torch, 0, uv_b_vec_flattened)*1.0/1000
    depth2_vec = depth2_vec.squeeze(1)

    # occlusion margin, in meters
    occlusion_margin = 0.003
    z2_vec = z2_vec - occlusion_margin
    zeros_vec = torch.zeros_like(depth2_vec)

    depth2_vec = where(depth2_vec < zeros_vec, zeros_vec, depth2_vec) # to be careful, prune any negative depths
    depth2_vec = where(depth2_vec < z2_vec, zeros_vec, depth2_vec)    # prune occlusions
    non_occluded_indices = torch.nonzero(depth2_vec)
    if non_occluded_indices.dim() == 0:
        return (None, None)
    non_occluded_indices = non_occluded_indices.squeeze(1)
    depth2_vec = torch.index_select(depth2_vec, 0, non_occluded_indices)

    # apply pruning
    u2_vec = torch.index_select(u2_vec, 0, non_occluded_indices)
    v2_vec = torch.index_select(v2_vec, 0, non_occluded_indices)
    u_a_pruned = torch.index_select(u_a_pruned, 0, non_occluded_indices) # also prune from first list
    v_a_pruned = torch.index_select(v_a_pruned, 0, non_occluded_indices) # also prune from first list

    uv_b_vec = (u2_vec, v2_vec)
    uv_a_vec = (u_a_pruned, v_a_pruned)
    return (uv_a_vec, uv_b_vec)
コード例 #28
0
def torch_non_max_suppression(detections, confidence_threshold, num_classes, nms_conf):
    # Check all boxes which have object confidence less than threshold
    object_confidence_mask = (detections[:, :, 4] > confidence_threshold).float()
    # Add a dimension for multiplying
    object_confidence_mask = object_confidence_mask.unsqueeze(2)
    # Apply mask to detections
    detections = detections * object_confidence_mask

    # Calculate top-left and right-bottom coordinate
    box_corner = detections.new(detections.shape)
    # top-left x-coordinate = centre_x - width / 2
    box_corner[:, :, 0] = detections[:, :, 0] - detections[:, :, 2] / 2
    # top-left y-coordinate = centre_y - height / 2
    box_corner[:, :, 1] = detections[:, :, 1] - detections[:, :, 3] / 2
    # right-bottom x-coordinate = centre_x + width / 2
    box_corner[:, :, 2] = detections[:, :, 0] + detections[:, :, 2] / 2
    # right-bottom y-coordinate = centre_y + height / 2
    box_corner[:, :, 3] = detections[:, :, 1] + detections[:, :, 3] / 2

    # Transform bx, by, bw, bh to top_left_x, top_left_y, right_bottom_x, right_bottom_y
    detections[:, :, :4] = box_corner[:, :, :4]

    batch_size = detections.size(0)
    write = False

    for index in range(batch_size):
        image_prediction = detections[index]  # 10647 x 85

        # max_confidence, max_confidence_class: 10647
        max_confidence, max_confidence_class = torch.max(input=image_prediction[:, 5:5 + num_classes], dim=1)

        # Add a dimension for multiplying
        max_confidence = max_confidence.float().unsqueeze(1)
        max_confidence_class = max_confidence_class.float().unsqueeze(1)

        # Concatenate image_prediction, max_confidence and max_confidence_class
        sequence = (image_prediction[:, :5], max_confidence, max_confidence_class)
        image_prediction = torch.cat(sequence, dim=1)

        # Get rid of bounding-boxes which have object confidence less than threshold
        # Get index of elements which have non-zero value
        non_zero_index = torch.nonzero(image_prediction[:, 4])
        try:
            # 7 is: bx, by, bw, bh, object_confidence, max_confidence, max_confidence_class
            # After this step, our prediction only have some boxes for each class
            # Perform non-max suppression to get rid of boxes which have low IoU
            image_prediction_ = image_prediction[non_zero_index.squeeze(), :].view(-1, 7)
        except:
            # In this case, there is not any detection
            continue

        try:
            image_classes = unique(image_prediction_[:, -1])  # The last index is the class index
        except IndexError:
            print('There is no object in this image')
            continue

        for class_ in image_classes:
            class_mask = image_prediction_ * (image_prediction_[:, -1] == class_).float().unsqueeze(1)
            class_mask_index = torch.nonzero(class_mask[:, -2]).squeeze()
            image_prediction_class = image_prediction_[class_mask_index].view(-1, 7)

            # Sorting image_prediction_class by object confidence
            conf_sort_index = torch.sort(image_prediction_class[:, 4], descending=True)[1]
            image_prediction_class = image_prediction_class[conf_sort_index]

            # Perform IoU
            no_of_boxes = image_prediction_class.size(0)
            for box_index in range(no_of_boxes):
                # Get the IOUs of all boxes that come after the one we are looking at in the loop
                try:
                    ious = bbox_iou(image_prediction_class[box_index].unsqueeze(0),
                                    image_prediction_class[box_index + 1:])
                except ValueError:
                    break
                except IndexError:
                    break

                # Zero out all the detections that have IoU > treshhold
                iou_mask = (ious < nms_conf).float().unsqueeze(1)
                image_prediction_class[box_index + 1:] *= iou_mask

                # Remove the non-zero entries
                non_zero_index = torch.nonzero(image_prediction_class[:, 4]).squeeze()
                image_prediction_class = image_prediction_class[non_zero_index].view(-1, 7)

            batch_index = image_prediction_class.new(image_prediction_class.size(0), 1).fill_(index)
            # Repeat the batch_id for as many detections of the class cls in the image
            seq = batch_index, image_prediction_class

            if not write:
                output = torch.cat(seq, 1)
                write = True
            else:
                out = torch.cat(seq, 1)
                output = torch.cat((output, out))

        try:
            return output
        except:
            return 0
コード例 #29
0
    def __getitem__(self, index):
        ### input A (label maps)
        A_path = self.A_paths[index]
        A = Image.open(A_path)
        w, h = A.size
        max_size = max(w, h)

        if self.opt.longSize != max_size:
            scale_size = float(self.opt.longSize / max_size)
            new_w = int(scale_size * w)
            new_h = int(scale_size * h)
            A = A.resize((new_w, new_h), Image.NEAREST)
            # if self.opt.isTrain or self.opt.random_embed==False:
            B_path = self.B_paths[index]
            B = Image.open(B_path).convert('RGB')
            B = B.resize((new_w, new_h), Image.BICUBIC)
        else:
            # if self.opt.isTrain or self.opt.random_embed==False:
            B_path = self.B_paths[index]
            B = Image.open(B_path).convert('RGB')

        C_tensor = 0

        A_tensor = transforms.functional.to_tensor(A) * 255.0
        B_tensor = transforms.functional.to_tensor(B)
        real_B_tensor = B_tensor.clone()
        mask_bg = (A_tensor == 0).type(torch.FloatTensor)
        B_tensor = torch.clamp(
            B_tensor + mask_bg * torch.ones(A_tensor.size()), 0, 1)
        B = transforms.functional.to_pil_image(B_tensor)

        if self.opt.data_augmentation == True:
            assert self.opt.isTrain == True
            rotate, scale, shear = random.random() - 0.5, random.random(
            ) - 0.5, random.random() - 0.5
            rotate, scale, shear = 0, 0, 0
            B = transforms.functional.affine(B,
                                             20 * rotate, [0, 0],
                                             1 + 0.2 * scale,
                                             10 * shear,
                                             resample=Image.BICUBIC)
            A = transforms.functional.affine(A,
                                             20 * rotate, [0, 0],
                                             1 + 0.2 * scale,
                                             10 * shear,
                                             resample=Image.NEAREST)
            C_tensor = transforms.functional.to_tensor(B)
            C_tensor = transforms.Normalize((0.5, 0.5, 0.5),
                                            (0.5, 0.5, 0.5))(C_tensor)

        # if self.opt.isTrain or self.opt.random_embed==False:
        B_tensor = transforms.functional.to_tensor(B)
        B_tensor = transforms.Normalize((0.5, 0.5, 0.5),
                                        (0.5, 0.5, 0.5))(B_tensor)
        real_B_tensor = transforms.Normalize((0.5, 0.5, 0.5),
                                             (0.5, 0.5, 0.5))(real_B_tensor)

        # else:
        # B_tensor = 0

        # get mean of left eye, right eye, mouth
        # first y next x

        A_tensor = transforms.functional.to_tensor(A) * 255.0

        mask_tensor = torch.zeros(6)
        try:
            mask_left_eye_r = torch.nonzero(A_tensor == 4)
            this_top = int(torch.min(mask_left_eye_r, 0)[0][1])
            this_left = int(torch.min(mask_left_eye_r, 0)[0][2])
            this_bottom = int(torch.max(mask_left_eye_r, 0)[0][1])
            this_right = int(torch.max(mask_left_eye_r, 0)[0][2])
            x_mean = int((this_left + this_right) / 2)
            y_mean = int((this_top + this_bottom) / 2)
            mask_tensor[0] = y_mean
            mask_tensor[1] = x_mean
            # mask_list.append(x_mean)
            # mask_list.append(y_mean)
        except:
            print("left eye problem ------------------")
            print(A_path)
            mask_tensor[0] = 116
            mask_tensor[1] = 96
            # mask_list.append(116)
            # mask_list.append(96)

        try:
            mask_right_eye_r = torch.nonzero(A_tensor == 5)
            this_top = int(torch.min(mask_right_eye_r, 0)[0][1])
            this_left = int(torch.min(mask_right_eye_r, 0)[0][2])
            this_bottom = int(torch.max(mask_right_eye_r, 0)[0][1])
            this_right = int(torch.max(mask_right_eye_r, 0)[0][2])
            x_mean = int((this_left + this_right) / 2)
            y_mean = int((this_top + this_bottom) / 2)
            mask_tensor[2] = y_mean
            mask_tensor[3] = x_mean
            # mask_list.append(x_mean)
            # mask_list.append(y_mean)
        except:
            print("right eye problem --------------")
            print(A_path)
            mask_tensor[2] = 116
            mask_tensor[3] = 160
            # mask_list.append(116)
            # mask_list.append(160)

        try:
            mask_mouth_r = torch.nonzero((A_tensor == 7) + (A_tensor == 8) +
                                         (A_tensor == 9))
            this_top = int(torch.min(mask_mouth_r, 0)[0][1])
            this_left = int(torch.min(mask_mouth_r, 0)[0][2])
            this_bottom = int(torch.max(mask_mouth_r, 0)[0][1])
            this_right = int(torch.max(mask_mouth_r, 0)[0][2])
            x_mean = int((this_left + this_right) / 2)
            y_mean = int((this_top + this_bottom) / 2)
            mask_tensor[4] = y_mean
            mask_tensor[5] = x_mean
        except:
            print("mouth problem --------------")
            print(A_path)
            mask_tensor[4] = 184
            mask_tensor[5] = 128
            # mask_list.append(184) # or 180
            # mask_list.append(128)

        assert 16 < mask_tensor[0] < 256 - 16
        assert 24 < mask_tensor[1] < 256 - 24
        assert 16 < mask_tensor[2] < 256 - 16
        assert 24 < mask_tensor[3] < 256 - 24
        assert 40 < mask_tensor[4] < 256 - 40
        assert 72 < mask_tensor[5] < 256 - 72

        # A_tensor = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(A_tensor) * 255.0

        inst_tensor = feat_tensor = 0
        A_tensor = self.append_region(A, A_tensor, mask_tensor)

        # ---------------------------------------------------------------------------------------------------------------

        mask_A_path = self.mask_A_paths[index]
        mask_A = Image.open(mask_A_path)
        # params = get_params(self.opt, mask_A.size)

        mask_A_tensor = transforms.functional.to_tensor(mask_A) * 255.0

        w, h = mask_A.size
        max_size = max(w, h)

        if self.opt.longSize != max_size:
            scale_size = float(self.opt.longSize / max_size)
            new_w = int(scale_size * w)
            new_h = int(scale_size * h)
            mask_A = mask_A.resize((new_w, new_h), Image.NEAREST)
            # if self.opt.isTrain or self.opt.random_embed==False:
            mask_B_path = self.mask_B_paths[index]
            mask_B = Image.open(mask_B_path).convert('RGB')
            mask_B = mask_B.resize((new_w, new_h), Image.BICUBIC)
        else:
            # if self.opt.isTrain or self.opt.random_embed==False:
            mask_B_path = self.mask_B_paths[index]
            mask_B = Image.open(mask_B_path).convert('RGB')

        mask_A_tensor = transforms.functional.to_tensor(mask_A) * 255.0
        mask_B_tensor = transforms.functional.to_tensor(mask_B)
        real_mask_B_tensor = mask_B_tensor.clone()
        mask_bg = (mask_A_tensor == 0).type(torch.FloatTensor)
        mask_B_tensor = torch.clamp(
            mask_B_tensor + mask_bg * torch.ones(mask_A_tensor.size()), 0, 1)
        mask_B = transforms.functional.to_pil_image(mask_B_tensor)

        if self.opt.data_augmentation == True:
            assert self.opt.isTrain == True
            rotate, scale, shear = random.random() - 0.5, random.random(
            ) - 0.5, random.random() - 0.5
            rotate, scale, shear = 0, 0, 0
            mask_B = transforms.functional.affine(mask_B,
                                                  20 * rotate, [0, 0],
                                                  1 + 0.2 * scale,
                                                  10 * shear,
                                                  resample=Image.BICUBIC)
            mask_A = transforms.functional.affine(mask_A,
                                                  20 * rotate, [0, 0],
                                                  1 + 0.2 * scale,
                                                  10 * shear,
                                                  resample=Image.NEAREST)

        # if self.opt.isTrain or self.opt.random_embed==False:
        mask_B_tensor = transforms.functional.to_tensor(mask_B)
        mask_B_tensor = transforms.Normalize((0.5, 0.5, 0.5),
                                             (0.5, 0.5, 0.5))(mask_B_tensor)
        real_mask_B_tensor = transforms.Normalize(
            (0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(real_mask_B_tensor)

        mask_A_tensor = transforms.functional.to_tensor(mask_A) * 255.0

        mask_tensor2 = torch.zeros(6)
        try:
            mask_left_eye_r = torch.nonzero(mask_A_tensor == 4)
            this_top = int(torch.min(mask_left_eye_r, 0)[0][1])
            this_left = int(torch.min(mask_left_eye_r, 0)[0][2])
            this_bottom = int(torch.max(mask_left_eye_r, 0)[0][1])
            this_right = int(torch.max(mask_left_eye_r, 0)[0][2])
            x_mean = int((this_left + this_right) / 2)
            y_mean = int((this_top + this_bottom) / 2)
            mask_tensor2[0] = y_mean
            mask_tensor2[1] = x_mean
            # mask_list.append(x_mean)
            # mask_list.append(y_mean)
        except:
            print("left eye problem ------------------")
            print(mask_A_path)
            mask_tensor2[0] = 116
            mask_tensor2[1] = 96
            # mask_list.append(116)
            # mask_list.append(96)

        try:
            mask_right_eye_r = torch.nonzero(mask_A_tensor == 5)
            this_top = int(torch.min(mask_right_eye_r, 0)[0][1])
            this_left = int(torch.min(mask_right_eye_r, 0)[0][2])
            this_bottom = int(torch.max(mask_right_eye_r, 0)[0][1])
            this_right = int(torch.max(mask_right_eye_r, 0)[0][2])
            x_mean = int((this_left + this_right) / 2)
            y_mean = int((this_top + this_bottom) / 2)
            mask_tensor2[2] = y_mean
            mask_tensor2[3] = x_mean
            # mask_list.append(x_mean)
            # mask_list.append(y_mean)
        except:
            print("right eye problem --------------")
            print(mask_A_path)
            mask_tensor2[2] = 116
            mask_tensor2[3] = 160
            # mask_list.append(116)
            # mask_list.append(160)

        try:
            mask_mouth_r = torch.nonzero((mask_A_tensor == 7) +
                                         (mask_A_tensor == 8) +
                                         (mask_A_tensor == 9))
            this_top = int(torch.min(mask_mouth_r, 0)[0][1])
            this_left = int(torch.min(mask_mouth_r, 0)[0][2])
            this_bottom = int(torch.max(mask_mouth_r, 0)[0][1])
            this_right = int(torch.max(mask_mouth_r, 0)[0][2])
            x_mean = int((this_left + this_right) / 2)
            y_mean = int((this_top + this_bottom) / 2)
            mask_tensor2[4] = y_mean
            mask_tensor2[5] = x_mean
        except:
            print("mouth problem --------------")
            print(mask_A_path)
            mask_tensor2[4] = 184
            mask_tensor2[5] = 128
            # mask_list.append(184) # or 180
            # mask_list.append(128)

        assert 16 < mask_tensor2[0] < 256 - 16
        assert 24 < mask_tensor2[1] < 256 - 24
        assert 16 < mask_tensor2[2] < 256 - 16
        assert 24 < mask_tensor2[3] < 256 - 24
        assert 40 < mask_tensor2[4] < 256 - 40
        assert 72 < mask_tensor2[5] < 256 - 72

        mask_A_tensor = self.append_region(mask_A, mask_A_tensor, mask_tensor2)

        input_dict = {
            'label': A_tensor,
            'inst': inst_tensor,
            'image': B_tensor,
            'mask2': mask_tensor2,
            'bg_styleimage': real_B_tensor,
            'bg_contentimage': real_mask_B_tensor,
            'feat': feat_tensor,
            'path': A_path,
            'image_affine': C_tensor,
            'mask': mask_tensor,
            'label2': mask_A_tensor
        }

        # content image:  bg_contentimage, label2, mask2
        # style image:  bg_styleimage, label, mask,       label,image_affine

        return input_dict
コード例 #30
0
    def __call__(self, locations, box_cls, box_regression, centerness,
                 targets):
        """
        Arguments:
            locations (list[BoxList])
            box_cls (list[Tensor])
            box_regression (list[Tensor])
            centerness (list[Tensor])
            targets (list[BoxList])

        Returns:
            cls_loss (Tensor)
            reg_loss (Tensor)
            centerness_loss (Tensor)
        """
        # 0 fpn 第一层
        N = box_cls[0].size(0)
        num_classes = box_cls[0].size(1)  #//self.num_pts

        # level first
        labels, reg_targets = self.prepare_targets(locations, targets)

        box_cls_flatten = []
        box_regression_flatten = []
        centerness_flatten = []
        labels_flatten = []
        reg_targets_flatten = []
        # for level
        for l in range(len(labels)):
            # batch*num_pos num_classes
            box_cls_flatten.append(box_cls[l].permute(0, 2, 3, 1).reshape(
                -1, num_classes))
            box_regression_flatten.append(box_regression[l].permute(
                0, 2, 3, 1).reshape(-1, 5))
            # layer_h, layer_w = box_cls[l].size(2), box_cls[l].size(3)
            # box_cls_flatten.append(box_cls[l].permute(0, 2, 3, 1).reshape(N, layer_h, layer_w, self.num_pts, num_classes).permute(0, 3, 1, 2, 4).reshape(-1,num_classes))
            # box_regression_flatten.append(box_regression[l].permute(0, 2, 3, 1).reshape(N, layer_h, layer_w, self.num_pts, 5).permute(0, 3, 1, 2, 4).reshape(-1,5))
            labels_flatten.append(labels[l].reshape(-1))
            #*******************************************#
            reg_targets_flatten.append(reg_targets[l].reshape(-1, 7))
            #*******************************************#
            centerness_flatten.append(centerness[l].reshape(-1))
        # level batch*num_pos num_classes
        box_cls_flatten = torch.cat(box_cls_flatten, dim=0)
        box_regression_flatten = torch.cat(box_regression_flatten, dim=0)
        centerness_flatten = torch.cat(centerness_flatten, dim=0)
        labels_flatten = torch.cat(labels_flatten, dim=0)
        reg_targets_flatten = torch.cat(reg_targets_flatten, dim=0)

        pos_inds = torch.nonzero(labels_flatten > 0).squeeze(1)

        # wrong
        # cls_weight=torch.where(centerness_flatten==0, torch.ones_like(centerness_flatten), centerness_flatten).unsqueeze(-1)
        # cls_loss = self.cls_loss_func(
        #     box_cls_flatten,#.cpu()
        #     labels_flatten.int(),#,#.cpu()
        #     weight = cls_weight
        # ) / (pos_inds.numel() + N)  # add N to avoid dividing by a zero

        #*******************************************#
        all_centerness_weights = reg_targets_flatten[:, -2]
        #*******************************************#

        # torch.sqrt(
        # cls_weight=torch.where(all_centerness_weights==0, torch.ones_like(all_centerness_weights), all_centerness_weights).unsqueeze(-1)
        cls_weight = torch.where(all_centerness_weights == 0,
                                 torch.full_like(all_centerness_weights, 0.8),
                                 all_centerness_weights).unsqueeze(-1)

        # print(cls_weight)
        # print((all_centerness_weights==0).sum(), (cls_weight==0).sum())

        # 并不是所有点都是正样本
        cls_loss = 2 * self.cls_loss_func(
            box_cls_flatten,  #.cpu()
            labels_flatten.int(),  #.cpu()
            weight=cls_weight) / (pos_inds.numel() + N
                                  )  # add N to avoid dividing by a zero

        box_regression_flatten = box_regression_flatten[pos_inds]
        reg_targets_flatten = reg_targets_flatten[pos_inds]
        centerness_flatten = centerness_flatten[pos_inds]

        #*******************************************#
        if pos_inds.numel() > 0:
            # centerness_targets = self.compute_centerness_targets(reg_targets_flatten)
            # centerness_targets = reg_targets_flatten[:, -2]

            #这里是不是要和cls loss 保持一致
            reg_loss = smooth_l1_loss(
                box_regression_flatten,  #.cpu()
                reg_targets_flatten[:, :
                                    -2],  #.cpu()#*******************************************# 
                weight=reg_targets_flatten[:, -2].unsqueeze(
                    -1)  #cls_weight #******************
            )

            # 一定要回归center ness
            # reg_loss = torch.tensor(0)
            # print(centerness_targets)
            centerness_loss = self.centerness_loss_func(
                centerness_flatten,  #.cpu()
                reg_targets_flatten[:,
                                    -1]  #.cpu()#*******************************************# 
            )

        else:
            reg_loss = box_regression_flatten.sum()
            centerness_loss = centerness_flatten.sum()
            # .cuda()
        return cls_loss, reg_loss, 2 * centerness_loss  #*0
コード例 #31
0
def select_semihard(loss_values, margin):
    idcs = torch.nonzero((loss_values.view(-1) < margin) & (loss_values.view(-1) > 0)).view(-1)
    if len(idcs) == 0:
        return None
    choice = torch.randint(0, len(idcs), (1,), dtype=torch.long)[0]
    return idcs[choice]
コード例 #32
0
    def detect(self, bbx):
        with torch.no_grad():
            vis = False
            thresh = 0.05

            im_data = torch.FloatTensor(1).to(self.device)
            im_info = torch.FloatTensor(1).to(self.device)
            num_boxes = torch.LongTensor(1).to(self.device)
            gt_boxes = torch.FloatTensor(1).to(self.device)

            # total_tic = time.time()

            x, y, w, h = [int(p) for p in bbx]
            x = max(x, 0)
            y = max(y, 0)
            im = self.img[y:(y + h), x:(x + w)]
            # print ' (x=%d, y=%d), %d * %d, (%d, %d) - cropsize: %d * %d' % (x, y, w, h, x+w, y+h, im.shape[1], im.shape[0])
            w, h = im.shape[1], im.shape[0]
            refine_bbx = [0, 0, w, h]
            if w * h == 0:
                print 'What? %d * %d' % (w, h)
                # raw_input('Continue?')
                return False

            blobs, im_scales = _get_image_blob(im)
            assert len(im_scales) == 1, "Only single-image batch implemented"
            im_blob = blobs
            im_info_np = np.array(
                [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
                dtype=np.float32)

            im_data_pt = torch.from_numpy(im_blob)
            im_data_pt = im_data_pt.permute(0, 3, 1, 2)
            im_info_pt = torch.from_numpy(im_info_np)

            im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt)
            im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt)
            gt_boxes.data.resize_(1, 1, 5).zero_()
            num_boxes.data.resize_(1).zero_()

            # pdb.set_trace()
            # det_tic = time.time()

            rois, cls_prob, bbox_pred, \
            rpn_loss_cls, rpn_loss_box, \
            RCNN_loss_cls, RCNN_loss_bbox, \
            rois_label = self.fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

            scores = cls_prob.data
            boxes = rois.data[:, :, 1:5]

            if cfg.TEST.BBOX_REG:
                # Apply bounding-box regression deltas
                box_deltas = bbox_pred.data
                if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                    # Optionally normalize targets by a precomputed mean and stdev
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).to(self.device)

                    box_deltas = box_deltas.view(1, -1,
                                                 4 * len(self.pascal_classes))

                pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
                pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
            else:
                # Simply repeat the boxes, once for each class
                _ = torch.from_numpy(np.tile(boxes, (1, scores.shape[1])))
                pred_boxes = _.to(self.device)

            pred_boxes /= im_scales[0]

            scores = scores.squeeze()
            pred_boxes = pred_boxes.squeeze()

            # det_toc = time.time()
            # detect_time = det_toc - det_tic
            # misc_tic = time.time()

            if vis:
                im2show = np.copy(im)

            j = 15
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            step = 0
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_dets,
                           cfg.TEST.NMS,
                           force_cpu=not cfg.USE_GPU_NMS)
                cls_dets = cls_dets[keep.view(-1).long()]

                dets = cls_dets.cpu().numpy()
                for i in range(dets.shape[0]):
                    if dets[i, -1] > cf:
                        x1, y1, w1, h1 = dets[i][:4]
                        det = [x1, y1, w1 - x1, h1 - y1]
                        ratio = self.a_train_set.IOU(det, refine_bbx)
                        if ratio[0] > iou:  # IOU between prediction and detection should not be limited
                            step += 1

                if vis:
                    print cls_dets
                    dets = cls_dets.cpu().numpy()
                    # for i in range(dets.shape[0]):
                    #     bbox = tuple(int(np.round(x)) for x in dets[i, :4])
                    #     score = dets[i, -1]
                    #     if score > thresh:
                    #         crop = im[bbox[1]:bbox[3], bbox[0]:bbox[2]]
                    #         cv2.imwrite('in_place/%02d.jpg'%step, crop)
                    #         step += 1

                    im2show = vis_detections(im2show, self.pascal_classes[j],
                                             dets)

            # misc_toc = time.time()
            # nms_time = misc_toc - misc_tic

            if vis:
                cv2.imshow('test', im2show)
                cv2.waitKey(0)
                # result_path = os.path.join('results', imglist[num_images][:-4] + "_det.jpg")
                # cv2.imwrite(result_path, im2show)

            if step:
                return True
            return False
コード例 #33
0
def main(args):
    # load graph data
    if args.dataset == 'aifb':
        dataset = AIFBDataset()
    elif args.dataset == 'mutag':
        dataset = MUTAGDataset()
    elif args.dataset == 'bgs':
        dataset = BGSDataset()
    elif args.dataset == 'am':
        dataset = AMDataset()
    else:
        raise ValueError()

    g = dataset[0]
    category = dataset.predict_category
    num_classes = dataset.num_classes
    train_mask = g.nodes[category].data.pop('train_mask')
    test_mask = g.nodes[category].data.pop('test_mask')
    train_idx = th.nonzero(train_mask).squeeze()
    test_idx = th.nonzero(test_mask).squeeze()
    labels = g.nodes[category].data.pop('labels')

    # split dataset into train, validate, test
    if args.validation:
        val_idx = train_idx[:len(train_idx) // 5]
        train_idx = train_idx[len(train_idx) // 5:]
    else:
        val_idx = train_idx

    # check cuda
    device = 'cpu'
    use_cuda = args.gpu >= 0 and th.cuda.is_available()
    if use_cuda:
        th.cuda.set_device(args.gpu)
        device = 'cuda:%d' % args.gpu

    train_label = labels[train_idx]
    val_label = labels[val_idx]
    test_label = labels[test_idx]

    # create embeddings
    embed_layer = RelGraphEmbed(g, args.n_hidden)
    node_embed = embed_layer()
    # create model
    model = EntityClassify(g,
                           args.n_hidden,
                           num_classes,
                           num_bases=args.n_bases,
                           num_hidden_layers=args.n_layers - 2,
                           dropout=args.dropout,
                           use_self_loop=args.use_self_loop)

    if use_cuda:
        model.cuda()

    # train sampler
    sampler = dgl.sampling.MultiLayerNeighborSampler([args.fanout] *
                                                     args.n_layers)
    loader = dgl.sampling.NodeDataLoader(g, {category: train_idx},
                                         sampler,
                                         batch_size=args.batch_size,
                                         shuffle=True,
                                         num_workers=0)

    # validation sampler
    val_sampler = dgl.sampling.MultiLayerNeighborSampler([args.fanout] *
                                                         args.n_layers)
    val_loader = dgl.sampling.NodeDataLoader(g, {category: val_idx},
                                             val_sampler,
                                             batch_size=args.batch_size,
                                             shuffle=True,
                                             num_workers=0)

    # test sampler

    test_sampler = dgl.sampling.MultiLayerNeighborSampler([args.fanout] *
                                                          args.n_layers)
    test_loader = dgl.sampling.NodeDataLoader(g, {category: test_idx},
                                              test_sampler,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              num_workers=0)

    # optimizer
    all_params = itertools.chain(model.parameters(), embed_layer.parameters())
    optimizer = th.optim.Adam(all_params, lr=args.lr, weight_decay=args.l2norm)

    # training loop
    print("start training...")
    dur = []
    for epoch in range(args.n_epochs):
        model.train()
        optimizer.zero_grad()
        if epoch > 3:
            t0 = time.time()

        for i, (input_nodes, seeds, blocks) in enumerate(loader):
            blocks = [blk.to(device) for blk in blocks]
            seeds = seeds[
                category]  # we only predict the nodes with type "category"
            batch_tic = time.time()
            emb = extract_embed(node_embed, input_nodes)
            lbl = labels[seeds]
            if use_cuda:
                emb = {k: e.cuda() for k, e in emb.items()}
                lbl = lbl.cuda()
            logits = model(emb, blocks)[category]
            loss = F.cross_entropy(logits, lbl)
            loss.backward()
            optimizer.step()

            train_acc = th.sum(logits.argmax(dim=1) == lbl).item() / len(seeds)
            print(
                "Epoch {:05d} | Batch {:03d} | Train Acc: {:.4f} | Train Loss: {:.4f} | Time: {:.4f}"
                .format(epoch, i, train_acc, loss.item(),
                        time.time() - batch_tic))

        if epoch > 3:
            dur.append(time.time() - t0)

        val_loss, val_acc = evaluate(model, val_loader, node_embed, labels,
                                     category, device)
        print(
            "Epoch {:05d} | Valid Acc: {:.4f} | Valid loss: {:.4f} | Time: {:.4f}"
            .format(epoch, val_acc, val_loss, np.average(dur)))
    print()
    if args.model_path is not None:
        th.save(model.state_dict(), args.model_path)

    output = model.inference(g, args.batch_size, 'cuda' if use_cuda else 'cpu',
                             0, node_embed)
    test_pred = output[category][test_idx]
    test_labels = labels[test_idx]
    test_acc = (test_pred.argmax(1) == test_labels).float().mean()
    print("Test Acc: {:.4f}".format(test_acc))
    print()
コード例 #34
0
def inference_relation(
    cfg,
    model,
    data_loader,
    device="cuda",
):
    # convert to a torch.device for efficiency
    device = torch.device(device)
    num_devices = get_world_size()
    if num_devices > 1:
        print("test acc is not support multi gpu")
        exit(-1)
    dataset = data_loader.dataset

    # for relation acc
    matcher = Matcher(
        0,
        0,
        allow_low_quality_matches=False,
    )
    all_acc_num = 0
    all_score_acc_num = 0
    all_count = 0
    all_infer_count = 0

    # category variables
    with open("datasets/coco/panoptic_coco_categories.json", "r") as f:
        categories_list = json.load(f)
    categories = {el['id']: el for el in categories_list}
    id_generator = IdGenerator(categories)
    # sem categories
    count = 1
    sem_contiguous_ids = []
    sem_contiguous_id_to_ps_categoty_id = {}
    for l in categories_list:
        if not l["isthing"]:
            sem_contiguous_ids.append(count)
            sem_contiguous_id_to_ps_categoty_id[count] = l["id"]
            count += 1

    # compute on dataset
    model.eval()
    cpu_device = torch.device("cpu")
    masker = Masker(threshold=0.5, padding=1)

    for images, targets, image_ids in data_loader:
        images = images.to(device)
        with torch.no_grad():
            outputs = model(images)
            outputs = [o.to(device) for o in outputs]
        targets = [target.to(device) for target in targets]
        for image_id, output, target in zip(image_ids, outputs, targets):
            # generate pred instance id
            origin_scores = output.get_field("scores")
            keep = torch.nonzero(
                origin_scores > cfg.MODEL.SEMANTIC.CONFIDENCE_THR).squeeze(1)
            output = output[keep]
            try:
                match_quality_matrix = boxlist_iou(target, output)
                matched_idxs = matcher(match_quality_matrix).tolist()
            except:
                continue

            target_instance_ids = target.get_field("instance_ids").tolist()
            pred_instance_ids = []
            for idx in matched_idxs:
                if idx > 0:
                    pred_instance_ids.append(target_instance_ids[idx])
                else:
                    pred_instance_ids.append(-1)

            target_relations = target.get_field("relations")["relations"]

            img_info = dataset.get_img_info(image_id)
            image_width = img_info["width"]
            image_height = img_info["height"]
            output = output.resize((image_width, image_height))

            # detection result
            boxes = output.bbox.tolist()
            scores = output.get_field("scores").tolist()
            if output.has_field("relation_val"):
                relation_vals = output.get_field("relation_val").tolist()
            else:
                relation_vals = [0. for _ in range(len(scores))]

            labels = output.get_field("labels").tolist()
            labels = [
                dataset.contiguous_category_id_to_json_id[i] for i in labels
            ]
            # mask result
            masks = output.get_field("mask")
            # Masker is necessary only if masks haven't been already resized.
            if list(masks.shape[-2:]) != [image_height, image_width]:
                masks = masker(masks.expand(1, -1, -1, -1, -1), output)
                masks = masks[0]

            # construct instance results
            inst_results = []
            for box, score, instance_id, relation_val, label, mask in zip(
                    boxes, scores, pred_instance_ids, relation_vals, labels,
                    masks):
                inst_results.append({
                    "box":
                    box,
                    "score":
                    score,
                    "instance_id":
                    instance_id,
                    "relation_val":
                    relation_val,
                    "label":
                    label,
                    "segmentation":
                    COCOmask.encode(np.asfortranarray(mask[0]))
                })

            # segmentation fusion
            acc_num, score_acc_num, infer_count, count = combine_to_panoptic_for_acc(
                cfg, img_info, inst_results, id_generator, target_relations)
            all_acc_num += acc_num
            all_score_acc_num += score_acc_num
            all_count += count
            all_infer_count += infer_count
            if count > 0:
                print(all_acc_num, all_score_acc_num, all_count,
                      all_infer_count)
    print("RAP acc:", all_acc_num / all_count)
    print("score relation acc:", all_score_acc_num / all_count)
    print(all_infer_count)
コード例 #35
0
import torch
import numpy as np

a = torch.Tensor([[1, 2], [3, 4], [5, 6]])

print(a > 3)
print(a[a > 3])
print(torch.nonzero(a > 3))
コード例 #36
0
    def write_results(self, prediction, confidence, num_classes, nms_conf=0.4):
        """
        函数的结果为dx8的张量,每个检测有8个属性,
        即检测所属批次图像的索引、四个location, object score, max class score, max class score index
        :param prediction:
        :param confidence:
        :param num_classes:
        :param nms_conf:
        :return:
        """
        # 过滤分数低的bbox,并保留他们,方便后续向量化操作(每张图过滤后的数目不同)
        conf_mask = (prediction[:, :, 4] > confidence).float().unsqueeze(2)
        prediction = prediction * conf_mask
        # nms: 对每个类别相似的边界框做过滤
        # 转成对角线的坐标的形式,使用两个对角线的坐标的形式更好计算IOU
        box_corner = prediction.new(prediction.shape)
        box_corner[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2)
        box_corner[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2)
        box_corner[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2)
        box_corner[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2)
        prediction[:, :, :4] = box_corner[:, :, :4]

        # 每张图片经过nms出来的结果数目不一致,
        # 不能通过向量操作
        batch_size = prediction.size(0)
        write = False  # 是否初始化output的标志

        for ind in range(batch_size):
            image_pred = prediction[ind]

            # 每个边界框有85个属性,其中80个是类别score。
            # 只关心最高分的class score,
            # 每行删除80个类别分数,添加具有最大值的class score的索引和class score
            max_conf, max_conf_index = torch.max(
                image_pred[:, 5:5 + num_classes], 1)
            max_conf = max_conf.float().unsqueeze(1)
            max_conf_index = max_conf_index.float().unsqueeze(1)
            seq = (image_pred[:, :5], max_conf, max_conf_index)
            image_pred = torch.cat(seq, 1)

            # 过滤分数低的bbox,可能存在没有obj score大于阈值的bbox
            # debug, torch.nonzero出来的是非零元素的索引
            non_zero_ind = torch.nonzero(image_pred[:, 4])
            try:
                image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7)
            except:
                continue

            # For PyTorch 0.4 compatibility
            # Since the above code with not raise exception for no detection
            # as scalars are supported in PyTorch 0.4
            if image_pred_.shape[0] == 0:
                continue

            img_classes = unique(image_pred_[:, -1])

            # 按类别执行NMS
            for cls in img_classes:
                # perform NMS
                # 1. 提取特定类的检测值
                cls_mask = image_pred_ * (image_pred_[:, -1]
                                          == cls).float().unsqueeze(1)
                class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze()
                image_pred_class = image_pred_[class_mask_ind].view(-1, 7)

                conf_sort_index = torch.sort(image_pred_class[:, 4],
                                             descending=True)[1]
                image_pred_class = image_pred_class[conf_sort_index]
                idx = image_pred_class.size(0)

                for i in range(idx):
                    try:
                        ious = bbox_iou(image_pred_class[i].unsqueeze(0),
                                        image_pred_class[i + 1:])
                    except ValueError:
                        break

                    except IndexError:
                        break

                    # 将iou > threshold 的bbox置为零, 留下iou < threshold的bbox
                    iou_mask = (ious < nms_conf).float().unsqueeze(1)
                    image_pred_class[i + 1:] *= iou_mask
                    # 消除iou > nms_conf 的bbox, 留下iou < threshold的bbox
                    non_zero_ind = torch.nonzero(
                        image_pred_class[:, 4]).squeeze()
                    image_pred_class = image_pred_class[non_zero_ind].view(
                        -1, 7)

                batch_ind = image_pred_class.new(image_pred_class.size(0),
                                                 1).fill_(ind)
                # Repeat the batch_id for as many detections of the class cls in the image
                seq = batch_ind, image_pred_class
                # 函数的结果为dx8的张量,每个检测有8个属性,
                # 即检测所属批次图像的索引、四个location, object score, max class score, max class score index
                if not write:
                    output = torch.cat(seq, 1)
                    write = True
                else:
                    out = torch.cat(seq, 1)
                    output = torch.cat((output, out))

            try:
                return output
            except:
                return 0
コード例 #37
0
ファイル: question2.py プロジェクト: zhaochenqiu/courses
result_errors = list()
for level in torch.arange(7, 3, -1):
    if level > 0:
        I = torch.tensor(pyramid_I[level].astype(np.float32)).to(device)
        J = torch.tensor(pyramid_J[level].astype(np.float32)).to(device)
    else:
        I = torch.tensor(gaussian(pyramid_I[level].astype(np.float32),2.0)).to(device)
        J = torch.tensor(gaussian(pyramid_J[level].astype(np.float32),2.0)).to(device)


    height,width = I.shape

    # choose a set of pixel locations on the template image that are most informative
    tval = 0.9*threshold_otsu(I.cpu().numpy()) # reduce Otsu threshold value a bit to cover slightly wider areas
    important_ind = torch.nonzero((I.data>tval).view([height*width])).squeeze()

    # generate grid only once at each level
    yv, xv = torch.meshgrid([torch.arange(0,height).float().to(device), torch.arange(0,width).float().to(device)])
    # map coordinates to [-1,1]x[-1,1] so that grid_sample works properly
    yv = 2.0*yv/(height-1) - 1.0
    xv = 2.0*xv/(width-1) - 1.0


    # result = train(rho_data,mine_net,mine_net_optim)


    for itr in range(nItr[level]):
#    for itr in range(1000):

        C = torch.sum(B*v, 0)
コード例 #38
0
def compute_reference_loss(data_dict, config):
    """ Compute cluster reference loss

    Args:
        data_dict: dict (read-only)

    Returns:
        ref_loss, lang_loss, cluster_preds, cluster_labels
    """

    # NOTE: N := num_batches (B) * num_points_per_scene
    # NOTE: data_dict["cluster_ref"] are the cluster confidences from the match_module.py
    #       B := batch_size, num_proposal is fixed because of match_module.py!

    # unpack
    cluster_preds = data_dict["cluster_ref"]  # (B, num_proposal)

    # GT segmentation
    # label creation without using the class labels and real ground thruths
    # because loc. loss should be independant of obj. class. loss and
    # segmentation loss. (+ the same class can appear more than once)
    # hence we want to compare each cluster with the real cluster to find
    # the best cluster.
    gt_instances = data_dict['instance_labels']  # (N)
    target_inst_id = data_dict[
        'object_id']  # (B)#target_inst_id = torch.tensor(data_dict['object_id']).cuda() # (B)
    # as no extra batch_dim exists this gives the index of a next sample
    start_of_samples = data_dict['offsets']  # (B)
    proposal_batch_ids = data_dict['proposal_batch_ids']  # (nProposal + 1)

    # PointGroup:
    # NOTE: in PG clustering alg. only points of the same class can be in one cluster
    # they can be assigned mutliple clusters though, and point idx don't restart per
    # batch but continue throughout all the batch (as there is no extra batch_dim)

    # dim 1 for cluster_id, dim 2 for corresponding point idxs in N
    # sumNPoint: additional explanation in pointgroup.py
    preds_instances = data_dict['proposals_idx']  # (sumNPoint, 2)
    preds_offsets = data_dict['proposals_offset']  # (nProposal + 1)
    batch_size, num_proposals = cluster_preds.shape
    total_num_proposals = len(preds_offsets) - 1
    labels = torch.zeros(total_num_proposals)

    # reference loss
    criterion = SoftmaxRankingLoss()
    loss = torch.tensor(0, dtype=torch.float).cuda()
    # TODO: vectorize - instead of double iterative approach
    # for each sample in batch
    cluster_labels = torch.zeros_like(cluster_preds).cuda()
    for i in range(batch_size):
        start = start_of_samples[i]
        end = start_of_samples[i + 1]
        # gt_instances contains for each of the points their corresponding cluster_id
        # NOTE: we assume the point_ids are assigned based on their order in gt_instances
        #       we also assume that these ids match with the point_ids from PG
        correct_indices = (torch.arange(
            len(gt_instances))[gt_instances == target_inst_id[i]]).cuda()
        # nSamples is the number of points that are asigned to some clusters in one scene
        # NOTE: only works with an extra batch_size dimension
        #nSamples = preds_instances[i].shape[0]
        numbSamplePerCluster = torch.zeros(total_num_proposals)
        labels = torch.zeros(total_num_proposals)
        # TODO: are the gt_instances also unordered? no
        # TODO: is proposal_idx at the end actually 2*sumNPoints? dunno

        # select the correct ones
        # in preds_instances the proposals aren't ordered batchwise!
        # use proposal_batch_ids, preds_offsets to get correct window in preds_instances
        batch_proposals = preds_offsets[:-1][proposal_batch_ids ==
                                             i]  # proposals of one scene
        for j in range(len(batch_proposals)):
            start_id_proposal = batch_proposals[j]
            start_proposal_index = torch.nonzero(
                preds_offsets == batch_proposals[j])
            end_id_proposal = preds_offsets[start_proposal_index + 1]
            preds_instance_proposals = preds_instances[
                start_id_proposal:end_id_proposal]

            cluster_ids, member_points = preds_instance_proposals[:,
                                                                  0], preds_instance_proposals[:, 1].long(
                                                                  )
            cluster_id = cluster_ids[0]
            numbSamplePerCluster[cluster_id] = cluster_ids.shape[0]
            combined = torch.cat((member_points, correct_indices))
            _, counts = combined.unique(return_counts=True)
            numb_object_id_proposals = counts[counts > 1].shape[0]
            labels[cluster_id] = numb_object_id_proposals

        # union of points in real instance (gt) and respective pred instance
        # - labels to not have the intersection count double
        numbSamplePerCluster += len(correct_indices) - labels
        # normalize intersection with union => IoU score now
        labels = labels / numbSamplePerCluster
        max_elem = labels.max()
        # convert to one-hot-matrix with 0 on max per row
        # TODO: necessary if? -> # If no IoU with GT
        if max_elem != 0:
            labels = torch.floor(labels / max_elem)
        else:
            break
        # scene-wise loss calucation
        # labels is total_num_proposals long (same size as proposal_batch_ids)
        cluster_labels_scene = torch.FloatTensor(
            labels[proposal_batch_ids == i]).cuda()
        cluster_preds_scene = cluster_preds[i][:cluster_labels_scene.shape[
            0]]  # because in matching module 0s were added for missing values
        # loss = 0 is defined above
        loss += criterion(cluster_preds_scene, cluster_labels_scene.float())
        cluster_labels_scene_fill = torch.zeros(
            num_proposals - cluster_labels_scene.shape[0]).cuda()
        cluster_labels[i] = torch.cat(
            [cluster_labels_scene, cluster_labels_scene_fill])

    #cluster_labels = torch.FloatTensor(labels).cuda()

    # TODO: check if cluster_id starts with 0
    loss /= batch_size
    return loss, cluster_preds, cluster_labels
コード例 #39
0
ファイル: autoattack.py プロジェクト: fartashf/robust_bias
    def run_standard_evaluation(self, x_orig, y_orig, bs=250):
        if self.verbose:
            print('using {} version including {}'.format(self.version,
                ', '.join(self.attacks_to_run)))
        
        with torch.no_grad():
            # calculate accuracy
            n_batches = int(np.ceil(x_orig.shape[0] / bs))
            robust_flags = torch.zeros(x_orig.shape[0], dtype=torch.bool, device=x_orig.device)
            for batch_idx in range(n_batches):
                start_idx = batch_idx * bs
                end_idx = min( (batch_idx + 1) * bs, x_orig.shape[0])

                x = x_orig[start_idx:end_idx, :].clone().to(self.device)
                y = y_orig[start_idx:end_idx].clone().to(self.device)
                output = self.get_logits(x)
                correct_batch = y.eq(output.max(dim=1)[1])
                robust_flags[start_idx:end_idx] = correct_batch.detach().to(robust_flags.device)

            robust_accuracy = torch.sum(robust_flags).item() / x_orig.shape[0]
                
            if self.verbose:
                self.logger.log('initial accuracy: {:.2%}'.format(robust_accuracy))
                    
            x_adv = x_orig.clone().detach()
            startt = time.time()
            for attack in self.attacks_to_run:
                # item() is super important as pytorch int division uses floor rounding
                num_robust = torch.sum(robust_flags).item()

                if num_robust == 0:
                    break

                n_batches = int(np.ceil(num_robust / bs))

                robust_lin_idcs = torch.nonzero(robust_flags, as_tuple=False)
                if num_robust > 1:
                    robust_lin_idcs.squeeze_()
                
                for batch_idx in range(n_batches):
                    start_idx = batch_idx * bs
                    end_idx = min((batch_idx + 1) * bs, num_robust)

                    batch_datapoint_idcs = robust_lin_idcs[start_idx:end_idx]
                    if len(batch_datapoint_idcs.shape) > 1:
                        batch_datapoint_idcs.squeeze_(-1)
                    x = x_orig[batch_datapoint_idcs, :].clone().to(self.device)
                    y = y_orig[batch_datapoint_idcs].clone().to(self.device)

                    # make sure that x is a 4d tensor even if there is only a single datapoint left
                    if len(x.shape) == 3:
                        x.unsqueeze_(dim=0)
                    
                    # run attack
                    if attack == 'apgd-ce':
                        # apgd on cross-entropy loss
                        self.apgd.loss = 'ce'
                        self.apgd.seed = self.get_seed()
                        _, adv_curr = self.apgd.perturb(x, y, cheap=True)
                    
                    elif attack == 'apgd-dlr':
                        # apgd on dlr loss
                        self.apgd.loss = 'dlr'
                        self.apgd.seed = self.get_seed()
                        _, adv_curr = self.apgd.perturb(x, y, cheap=True)
                    
                    elif attack == 'fab':
                        # fab
                        self.fab.targeted = False
                        self.fab.seed = self.get_seed()
                        adv_curr = self.fab.perturb(x, y)
                    
                    elif attack == 'square':
                        # square
                        self.square.seed = self.get_seed()
                        adv_curr = self.square.perturb(x, y)
                    
                    elif attack == 'apgd-t':
                        # targeted apgd
                        self.apgd_targeted.seed = self.get_seed()
                        _, adv_curr = self.apgd_targeted.perturb(x, y, cheap=True)
                    
                    elif attack == 'fab-t':
                        # fab targeted
                        self.fab.targeted = True
                        self.fab.n_restarts = 1
                        self.fab.seed = self.get_seed()
                        adv_curr = self.fab.perturb(x, y)
                    
                    else:
                        raise ValueError('Attack not supported')
                
                    output = self.get_logits(adv_curr)
                    false_batch = ~y.eq(output.max(dim=1)[1]).to(robust_flags.device)
                    non_robust_lin_idcs = batch_datapoint_idcs[false_batch]
                    robust_flags[non_robust_lin_idcs] = False

                    x_adv[non_robust_lin_idcs] = adv_curr[false_batch].detach().to(x_adv.device)
                
                    if self.verbose:
                        num_non_robust_batch = torch.sum(false_batch)    
                        self.logger.log('{} - {}/{} - {} out of {} successfully perturbed'.format(
                            attack, batch_idx + 1, n_batches, num_non_robust_batch, x.shape[0]))
                
                robust_accuracy = torch.sum(robust_flags).item() / x_orig.shape[0]
                if self.verbose:
                    self.logger.log('robust accuracy after {}: {:.2%} (total time {:.1f} s)'.format(
                        attack.upper(), robust_accuracy, time.time() - startt))
                    
            # final check
            if self.verbose:
                if self.norm == 'Linf':
                    res = (x_adv - x_orig).abs().view(x_orig.shape[0], -1).max(1)[0]
                elif self.norm == 'L2':
                    res = ((x_adv - x_orig) ** 2).view(x_orig.shape[0], -1).sum(-1).sqrt()
                elif self.norm == 'dftinf':
                    res =  norm_f(x_adv - x_orig, 'dftinf')
                self.logger.log('max {} perturbation: {:.5f}, nan in tensor: {}, max: {:.5f}, min: {:.5f}'.format(
                    self.norm, res.max(), (x_adv != x_adv).sum(), x_adv.max(), x_adv.min()))
                self.logger.log('robust accuracy: {:.2%}'.format(robust_accuracy))
        
        return x_adv
コード例 #40
0
ファイル: gsnn.py プロジェクト: yichi0911/gsnn_demo
def get_adj_nodes(graph, nodes):
    # get adj nodes
    re = set(nodes)
    yes = set(torch.nonzero(graph[nodes])[:, 1].cpu().data.numpy())
    return sorted(list(re | yes))
コード例 #41
0
ファイル: test_shape_ops.py プロジェクト: zacker150/pytorch
 def _foo(t):
     tuple_result = torch.nonzero(t, as_tuple=True)
     nontuple_result = torch.nonzero(t, as_tuple=False)
     out = torch.empty_like(nontuple_result)
     torch.nonzero(t, as_tuple=False, out=out)
     return tuple_result, nontuple_result, out
コード例 #42
0
  def __getitem__(self, index):
    if self.training:
        index_ratio = int(self.ratio_index[index])
    else:
        index_ratio = index

    # get the anchor index for current sample index
    # here we set the anchor index to the last one
    # sample in this group
    minibatch_db = [self._roidb[index_ratio]]
    blobs = get_minibatch(minibatch_db, self._num_classes)
    data = torch.from_numpy(blobs['data'])
    im_info = torch.from_numpy(blobs['im_info'])
    # we need to random shuffle the bounding box.
    data_height, data_width = data.size(1), data.size(2)
    if self.training:
        np.random.shuffle(blobs['gt_boxes'])
        gt_boxes = torch.from_numpy(blobs['gt_boxes'])

        ########################################################
        # padding the input image to fixed size for each group #
        ########################################################

        # NOTE1: need to cope with the case where a group cover both conditions. (done)
        # NOTE2: need to consider the situation for the tail samples. (no worry)
        # NOTE3: need to implement a parallel data loader. (no worry)
        # get the index range

        # if the image need to crop, crop to the target size.
        ratio = self.ratio_list_batch[index]

        if self._roidb[index_ratio]['need_crop']:
            if ratio < 1:
                # this means that data_width << data_height, we need to crop the
                # data_height
                min_y = int(torch.min(gt_boxes[:,1]))
                max_y = int(torch.max(gt_boxes[:,3]))
                trim_size = int(np.floor(data_width / ratio))
                if trim_size > data_height:
                    trim_size = data_height                
                box_region = max_y - min_y + 1
                if min_y == 0:
                    y_s = 0
                else:
                    if (box_region-trim_size) < 0:
                        y_s_min = max(max_y-trim_size, 0)
                        y_s_max = min(min_y, data_height-trim_size)
                        if y_s_min == y_s_max:
                            y_s = y_s_min
                        else:
                            y_s = np.random.choice(range(y_s_min, y_s_max))
                    else:
                        y_s_add = int((box_region-trim_size)/2)
                        if y_s_add == 0:
                            y_s = min_y
                        else:
                            y_s = np.random.choice(range(min_y, min_y+y_s_add))
                # crop the image
                data = data[:, y_s:(y_s + trim_size), :, :]

                # shift y coordiante of gt_boxes
                gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s)
                gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s)

                # update gt bounding box according the trip
                gt_boxes[:, 1].clamp_(0, trim_size - 1)
                gt_boxes[:, 3].clamp_(0, trim_size - 1)

            else:
                # this means that data_width >> data_height, we need to crop the
                # data_width
                min_x = int(torch.min(gt_boxes[:,0]))
                max_x = int(torch.max(gt_boxes[:,2]))
                trim_size = int(np.ceil(data_height * ratio))
                if trim_size > data_width:
                    trim_size = data_width                
                box_region = max_x - min_x + 1
                if min_x == 0:
                    x_s = 0
                else:
                    if (box_region-trim_size) < 0:
                        x_s_min = max(max_x-trim_size, 0)
                        x_s_max = min(min_x, data_width-trim_size)
                        if x_s_min == x_s_max:
                            x_s = x_s_min
                        else:
                            x_s = np.random.choice(range(x_s_min, x_s_max))
                    else:
                        x_s_add = int((box_region-trim_size)/2)
                        if x_s_add == 0:
                            x_s = min_x
                        else:
                            x_s = np.random.choice(range(min_x, min_x+x_s_add))
                # crop the image
                data = data[:, :, x_s:(x_s + trim_size), :]

                # shift x coordiante of gt_boxes
                gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s)
                gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s)
                # update gt bounding box according the trip
                gt_boxes[:, 0].clamp_(0, trim_size - 1)
                gt_boxes[:, 2].clamp_(0, trim_size - 1)

        # based on the ratio, padding the image.
        if ratio < 1:
            # this means that data_width < data_height
            trim_size = int(np.floor(data_width / ratio))

            padding_data = torch.FloatTensor(int(np.ceil(data_width / ratio)), \
                                             data_width, 3).zero_()

            padding_data[:data_height, :, :] = data[0]
            # update im_info
            im_info[0, 0] = padding_data.size(0)
            # print("height %d %d \n" %(index, anchor_idx))
        elif ratio > 1:
            # this means that data_width > data_height
            # if the image need to crop.
            padding_data = torch.FloatTensor(data_height, \
                                             int(np.ceil(data_height * ratio)), 3).zero_()
            padding_data[:, :data_width, :] = data[0]
            im_info[0, 1] = padding_data.size(1)
        else:
            trim_size = min(data_height, data_width)
            padding_data = torch.FloatTensor(trim_size, trim_size, 3).zero_()
            padding_data = data[0][:trim_size, :trim_size, :]
            # gt_boxes.clamp_(0, trim_size)
            gt_boxes[:, :4].clamp_(0, trim_size)
            im_info[0, 0] = trim_size
            im_info[0, 1] = trim_size


        # check the bounding box:
        not_keep = (gt_boxes[:,0] == gt_boxes[:,2]) | (gt_boxes[:,1] == gt_boxes[:,3])
        keep = torch.nonzero(not_keep == 0).view(-1)

        gt_boxes_padding = torch.FloatTensor(self.max_num_box, gt_boxes.size(1)).zero_()
        if keep.numel() != 0:
            gt_boxes = gt_boxes[keep]
            num_boxes = min(gt_boxes.size(0), self.max_num_box)
            gt_boxes_padding[:num_boxes,:] = gt_boxes[:num_boxes]
        else:
            num_boxes = 0

            # permute trim_data to adapt to downstream processing
        padding_data = padding_data.permute(2, 0, 1).contiguous()
        im_info = im_info.view(3)

        return padding_data, im_info, gt_boxes_padding, num_boxes
    else:
        data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width)
        im_info = im_info.view(3)

        gt_boxes = torch.FloatTensor([1,1,1,1,1])
        num_boxes = 0

        return data, im_info, gt_boxes, num_boxes
コード例 #43
0
ファイル: rpn_head.py プロジェクト: JialianW/Forest_RCNN
    def get_bboxes_single(self,
                          cls_scores,
                          bbox_preds,
                          mlvl_anchors,
                          img_shape,
                          scale_factor,
                          cfg,
                          gt_bboxes,
                          gt_labels,
                          rescale=False,
                          parent_scores=None):
        mlvl_proposals = []
        for idx in range(len(cls_scores)):
            rpn_cls_score = cls_scores[idx]
            rpn_bbox_pred = bbox_preds[idx]
            assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:]
            rpn_cls_score = rpn_cls_score.permute(1, 2, 0)
            if self.use_sigmoid_cls:
                rpn_cls_score = rpn_cls_score.reshape(-1)
                scores = rpn_cls_score.sigmoid()
            else:
                rpn_cls_score = rpn_cls_score.reshape(-1, 2)
                scores = rpn_cls_score.softmax(dim=1)[:, 1]
            rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4)
            anchors = mlvl_anchors[idx]
            if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre:
                _, topk_inds = scores.topk(cfg.nms_pre)
                rpn_bbox_pred = rpn_bbox_pred[topk_inds, :]
                anchors = anchors[topk_inds, :]
                scores = scores[topk_inds]
            proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means,
                                   self.target_stds, img_shape)
            if cfg.min_bbox_size > 0:
                w = proposals[:, 2] - proposals[:, 0] + 1
                h = proposals[:, 3] - proposals[:, 1] + 1
                valid_inds = torch.nonzero((w >= cfg.min_bbox_size) &
                                           (h >= cfg.min_bbox_size)).squeeze()
                proposals = proposals[valid_inds, :]
                scores = scores[valid_inds]
            proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1)

            if cfg.nms_resampling is not None:  # only used in training
                if cfg.nms_resampling[0] == 'discrete':
                    a_r = cfg.nms_resampling[1]
                    a_c = cfg.nms_resampling[2]
                    a_f = cfg.nms_resampling[3]
                    proposals = self.nms_resampling_discrete(
                        proposals, gt_bboxes, gt_labels, a_r, a_c, a_f)
                elif cfg.nms_resampling[0] == 'linear':
                    thresh = cfg.nms_resampling[1]
                    proposals = self.nms_resampling_linear(
                        proposals, gt_bboxes, gt_labels, thresh)
            else:
                proposals, _ = nms(proposals, cfg.nms_thr)

            proposals = proposals[:cfg.nms_post, :]
            mlvl_proposals.append(proposals)
        proposals = torch.cat(mlvl_proposals, 0)
        if cfg.nms_across_levels:
            proposals, _ = nms(proposals, cfg.nms_thr)
            proposals = proposals[:cfg.max_num, :]
        else:
            scores = proposals[:, 4]
            num = min(cfg.max_num, proposals.shape[0])
            _, topk_inds = scores.topk(num)
            proposals = proposals[topk_inds, :]
        return proposals
コード例 #44
0
    def beam_search_decode(self,
                           input_tensor_with_lengths,
                           beam_size=1,
                           **kwargs):
        input_tensor, _ = input_tensor_with_lengths
        input_sequence_length, batch_size = input_tensor.size()
        target_length = min(int(cfg.maximum_decoding_length * 1.1),
                            input_sequence_length * 2)

        src_mask = None  # self.backbone.generate_square_subsequent_mask(input_tensor.size(0)).to(device)
        src_key_padding_mask = (
            input_tensor == self.SRC.vocab.stoi[cfg.pad_token]).transpose(
                0, 1).to(device)

        init_ys = torch.ones(1, batch_size).fill_(
            self.TGT.vocab.stoi[cfg.bos_token]).type_as(input_tensor.data)
        memory = self.backbone.encoder(
            self.src_embed(input_tensor),
            mask=src_mask,
            src_key_padding_mask=src_key_padding_mask)

        nodes = [(init_ys, torch.zeros(batch_size, device=device),
                  torch.zeros(batch_size, device=device).bool())]
        final_results = []

        for i in range(target_length - 1):
            k = beam_size - len(final_results)
            if k < 1:
                break
            all_predictions = torch.zeros(batch_size,
                                          len(nodes) * k,
                                          device=device).long()
            all_lm_scores = torch.zeros(batch_size,
                                        len(nodes) * k,
                                        device=device).float()
            # iterating over all the available hypotheses to expand the beams
            for n_id, (ys, lm_scores, eos_predicted) in enumerate(nodes):
                prob = self.extract_output_probabilities(
                    ys, memory, src_key_padding_mask)
                k_values, k_indices = torch.topk(prob, dim=1, k=k)
                for beam_index in range(k):
                    overall_index = n_id * k + beam_index
                    all_predictions[:, overall_index] = k_indices[:,
                                                                  beam_index]
                    all_lm_scores[:,
                                  overall_index] = lm_scores + k_values[:,
                                                                        beam_index]
            k_values, k_indices = torch.topk(all_lm_scores, dim=1, k=k)
            temp_next_nodes = []
            # creating the next k hypotheses
            for beam_index in range(k):
                node_ids = k_indices[:, beam_index] // k
                node_ids = list(
                    node_ids.cpu().numpy())  # list of size batch_size
                pred_ids = list(k_indices[:, beam_index].cpu().numpy())
                lm_score = k_values[:, beam_index]

                next_word = torch.zeros((batch_size, ), device=device).long()
                for b in range(batch_size):
                    next_word[b] = all_predictions[b, pred_ids[b]]

                eos_p = torch.cat([
                    nodes[n_id][2][b_id].unsqueeze(0)
                    for b_id, n_id in enumerate(node_ids)
                ],
                                  dim=0)
                eos_predicted = torch.max(
                    eos_p, (next_word == self.TGT.vocab.stoi[cfg.eos_token]))
                ys = torch.cat([
                    nodes[n_id][0][:, b_id].unsqueeze(1)
                    for b_id, n_id in enumerate(node_ids)
                ],
                               dim=1)
                ys = torch.cat([ys, next_word.view(1, batch_size)], dim=0)
                next_step_node = (ys, lm_score, eos_predicted)
                if sum(eos_predicted.int()) == batch_size:
                    final_results.append(next_step_node)
                else:
                    temp_next_nodes.append(next_step_node)
            del nodes[:]
            nodes = temp_next_nodes
        if not len(final_results):
            for node in nodes:
                final_results.append(node)
        # creating the final result based on the best scoring hypotheses
        result = torch.zeros(target_length, batch_size, device=device)
        lp = lambda l: ((5 + l)**self.beam_search_length_norm_factor) / (
            5 + 1)**self.beam_search_length_norm_factor
        for b_ind in range(batch_size):
            best_score = float('-inf')
            best_tokens = None
            for node in final_results:
                tokens = node[0][:, b_ind]
                eos_ind = torch.nonzero(torch.eq(
                    tokens, self.TGT.vocab.stoi[cfg.eos_token]),
                                        as_tuple=False).view(-1)
                if eos_ind.size(0):
                    tsize = eos_ind[0].item()
                else:
                    tsize = tokens.size(0)
                # based on Google's NMT system paper [https://arxiv.org/pdf/1609.08144.pdf]
                # since coverage is not being tracked here, coverage penalty is not also considered in this formula
                lms = node[1][b_ind].item() / lp(tsize)
                if lms > best_score:
                    best_score = lms
                    best_tokens = tokens
            result[:best_tokens[1:].size(0), b_ind] = best_tokens[1:]
        max_attention_indices = None
        return result, max_attention_indices, torch.zeros(1,
                                                          device=device), 1, 1
コード例 #45
0
def sparse(dense):
    indices = torch.nonzero(dense).t()
    values = dense[indices[0],
                   indices[1]]  # modify this based on dimensionality
    return torch.sparse.FloatTensor(indices, values, dense.size())
コード例 #46
0
ファイル: thinning.py プロジェクト: zmsunnyday/distiller
def create_thinning_recipe_filters(sgraph, model, zeros_mask_dict):
    """Create a recipe for removing filters from Convolution layers.

    The 4D weights of the model parameters (i.e. the convolution parameters) are
    examined one by one, to determine which has filters that are all zeros.
    For each weights tensor that has at least one zero-filter, we create a
    "thinning recipe".
    The thinning recipe contains meta-instructions of how the model
    should be changed in order to remove the filters.
    """
    msglogger.info("Invoking create_thinning_recipe_filters")

    thinning_recipe = ThinningRecipe(modules={}, parameters={})
    layers = {mod_name : m for mod_name, m in model.named_modules()}

    for param_name, param in model.named_parameters():
        # We are only interested in 4D weights
        if param.dim() != 4:
            continue

        # Find the number of zero-valued filters in this weights tensor
        filter_view = param.view(param.size(0), -1)
        num_filters = filter_view.size()[0]
        nonzero_filters = torch.nonzero(filter_view.abs().sum(dim=1))

        # If there are non-zero filters in this tensor then continue to next tensor
        if num_filters <= len(nonzero_filters):
            msglogger.debug("SKipping {} shape={}".format(param_name_2_layer_name(param_name), param.shape))
            continue

        msglogger.info("In tensor %s found %d/%d zero filters", param_name,
                       num_filters - len(nonzero_filters), num_filters)

        # We are removing filters, so update the number of outgoing channels (OFMs)
        # in the convolutional layer
        layer_name = param_name_2_layer_name(param_name)
        assert isinstance(layers[layer_name], torch.nn.modules.Conv2d)
        append_module_directive(thinning_recipe, layer_name, key='out_channels', val=len(nonzero_filters))

        # Select only the non-zero filters
        indices = nonzero_filters.data.squeeze()
        append_param_directive(thinning_recipe, param_name, (0, indices))

        if layers[layer_name].bias is not None:
            # This convolution has bias coefficients
            append_param_directive(thinning_recipe, layer_name+'.bias', (0, indices))

        # Find all instances of Convolution or FC (GEMM) layers that immediately follow this layer
        successors = sgraph.successors_f(normalize_module_name(layer_name), ['Conv', 'Gemm'])
        # Convert the layers names to PyTorch's convoluted naming scheme (when DataParallel is used)
        successors = [denormalize_module_name(model, successor) for successor in successors]
        for successor in successors:

            if isinstance(layers[successor], torch.nn.modules.Conv2d):
                # For each of the convolutional layers that follow, we have to reduce the number of input channels.
                append_module_directive(thinning_recipe, successor, key='in_channels', val=len(nonzero_filters))
                msglogger.info("[recipe] {}: setting in_channels = {}".format(successor, len(nonzero_filters)))

                # Now remove channels from the weights tensor of the successor conv
                append_param_directive(thinning_recipe, successor+'.weight', (1, indices))

            elif isinstance(layers[successor], torch.nn.modules.Linear):
                # If a Linear (Fully-Connected) layer follows, we need to update it's in_features member
                fm_size = layers[successor].in_features // layers[layer_name].out_channels
                in_features = fm_size * len(nonzero_filters)
                #append_module_directive(thinning_recipe, layer_name, key='in_features', val=in_features)
                append_module_directive(thinning_recipe, successor, key='in_features', val=in_features)
                msglogger.info("[recipe] {}: setting in_features = {}".format(successor, in_features))

                # Now remove channels from the weights tensor of the successor FC layer:
                # This is a bit tricky:
                fm_height = fm_width = int(math.sqrt(fm_size))
                view_4D = (layers[successor].out_features, layers[layer_name].out_channels, fm_height, fm_width)
                view_2D = (layers[successor].out_features, in_features)
                append_param_directive(thinning_recipe, successor+'.weight', (1, indices, view_4D, view_2D))

        # Now handle the BatchNormalization layer that follows the convolution
        bn_layers = sgraph.successors_f(normalize_module_name(layer_name), ['BatchNormalization'])
        if len(bn_layers) > 0:
            assert len(bn_layers) == 1
            # Thinning of the BN layer that follows the convolution
            bn_layer_name = denormalize_module_name(model, bn_layers[0])
            bn_thinning(thinning_recipe, layers, bn_layer_name, len_thin_features=len(nonzero_filters), thin_features=indices)
    return thinning_recipe
コード例 #47
0
    def __getitem__(self, index):
        if self.training:
            index_ratio = int(self.ratio_index[index])
        else:
            index_ratio = index

        # get the anchor index for current sample index
        # here we set the anchor index to the last one
        # sample in this group
        minibatch_db = [self._roidb[index_ratio]]
        blobs = get_minibatch(minibatch_db, self._num_classes)
        data = torch.from_numpy(blobs['data'])
        im_info = torch.from_numpy(blobs['im_info'])
        # we need to random shuffle the bounding box.
        data_height, data_width = data.size(1), data.size(2)
        if self.training:
            np.random.shuffle(blobs['gt_boxes'])
            gt_boxes = torch.from_numpy(blobs['gt_boxes'])

            ########################################################
            # padding the input image to fixed size for each group #
            ########################################################

            # NOTE1: need to cope with the case where a group cover both conditions. (done)
            # NOTE2: need to consider the situation for the tail samples. (no worry)
            # NOTE3: need to implement a parallel data loader. (no worry)
            # get the index range

            # if the image need to crop, crop to the target size.
            ratio = self.ratio_list_batch[index]

            if self._roidb[index_ratio]['need_crop']:
                if ratio < 1:
                    # this means that data_width << data_height, we need to crop the
                    # data_height
                    min_y = int(torch.min(gt_boxes[:, 1]))
                    max_y = int(torch.max(gt_boxes[:, 3]))
                    trim_size = int(np.floor(data_width / ratio))
                    if trim_size > data_height:
                        trim_size = data_height
                    box_region = max_y - min_y + 1
                    if min_y == 0:
                        y_s = 0
                    else:
                        if (box_region - trim_size) < 0:
                            y_s_min = max(max_y - trim_size, 0)
                            y_s_max = min(min_y, data_height - trim_size)
                            if y_s_min == y_s_max:
                                y_s = y_s_min
                            else:
                                y_s = np.random.choice(range(y_s_min, y_s_max))
                        else:
                            y_s_add = int((box_region - trim_size) / 2)
                            if y_s_add == 0:
                                y_s = min_y
                            else:
                                y_s = np.random.choice(
                                    range(min_y, min_y + y_s_add))
                    # crop the image
                    data = data[:, y_s:(y_s + trim_size), :, :]

                    # shift y coordiante of gt_boxes
                    gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s)
                    gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s)

                    # update gt bounding box according the trip
                    gt_boxes[:, 1].clamp_(0, trim_size - 1)
                    gt_boxes[:, 3].clamp_(0, trim_size - 1)

                else:
                    # this means that data_width >> data_height, we need to crop the
                    # data_width
                    min_x = int(torch.min(gt_boxes[:, 0]))
                    max_x = int(torch.max(gt_boxes[:, 2]))
                    trim_size = int(np.ceil(data_height * ratio))
                    if trim_size > data_width:
                        trim_size = data_width
                    box_region = max_x - min_x + 1
                    if min_x == 0:
                        x_s = 0
                    else:
                        if (box_region - trim_size) < 0:
                            x_s_min = max(max_x - trim_size, 0)
                            x_s_max = min(min_x, data_width - trim_size)
                            if x_s_min == x_s_max:
                                x_s = x_s_min
                            else:
                                x_s = np.random.choice(range(x_s_min, x_s_max))
                        else:
                            x_s_add = int((box_region - trim_size) / 2)
                            if x_s_add == 0:
                                x_s = min_x
                            else:
                                x_s = np.random.choice(
                                    range(min_x, min_x + x_s_add))
                    # crop the image
                    data = data[:, :, x_s:(x_s + trim_size), :]

                    # shift x coordiante of gt_boxes
                    gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s)
                    gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s)
                    # update gt bounding box according the trip
                    gt_boxes[:, 0].clamp_(0, trim_size - 1)
                    gt_boxes[:, 2].clamp_(0, trim_size - 1)

            # based on the ratio, padding the image.
            if ratio < 1:
                # this means that data_width < data_height
                trim_size = int(np.floor(data_width / ratio))

                padding_data = torch.FloatTensor(int(np.ceil(data_width / ratio)), \
                                                 data_width, 3).zero_()

                padding_data[:data_height, :, :] = data[0]
                # update im_info
                im_info[0, 0] = padding_data.size(0)
                # print("height %d %d \n" %(index, anchor_idx))
            elif ratio > 1:
                # this means that data_width > data_height
                # if the image need to crop.
                padding_data = torch.FloatTensor(data_height, \
                                                 int(np.ceil(data_height * ratio)), 3).zero_()
                padding_data[:, :data_width, :] = data[0]
                im_info[0, 1] = padding_data.size(1)
            else:
                trim_size = min(data_height, data_width)
                padding_data = torch.FloatTensor(trim_size, trim_size,
                                                 3).zero_()
                padding_data = data[0][:trim_size, :trim_size, :]
                # gt_boxes.clamp_(0, trim_size)
                gt_boxes[:, :4].clamp_(0, trim_size)
                im_info[0, 0] = trim_size
                im_info[0, 1] = trim_size
            # check the bounding box:
            if len(gt_boxes) > 0:
                not_keep = (gt_boxes[:, 0] == gt_boxes[:, 2]) | (
                    gt_boxes[:, 1] == gt_boxes[:, 3])
                keep = torch.nonzero(not_keep == 0).view(-1)

            gt_boxes_padding = torch.FloatTensor(self.max_num_box,
                                                 gt_boxes.size(1)).zero_()
            if keep.numel() != 0:
                gt_boxes = gt_boxes[keep]
                num_boxes = min(gt_boxes.size(0), self.max_num_box)
                gt_boxes_padding[:num_boxes, :] = gt_boxes[:num_boxes]
            else:
                num_boxes = 0

                # permute trim_data to adapt to downstream processing
            padding_data = padding_data.permute(2, 0, 1).contiguous()
            im_info = im_info.view(3)

            return padding_data, im_info, gt_boxes_padding, num_boxes
        else:
            data = data.permute(0, 3, 1,
                                2).contiguous().view(3, data_height,
                                                     data_width)
            im_info = im_info.view(3)

            gt_boxes = torch.FloatTensor([1, 1, 1, 1, 1])
            num_boxes = 0

            return data, im_info, gt_boxes, num_boxes
コード例 #48
0
def write_results_half(prediction,
                       confidence,
                       num_classes,
                       nms=True,
                       nms_conf=0.4):
    conf_mask = (prediction[:, :, 4] > confidence).half().unsqueeze(2)
    prediction = prediction * conf_mask

    try:
        ind_nz = torch.nonzero(prediction[:, :, 4]).transpose(0,
                                                              1).contiguous()
    except:
        return 0

    box_a = prediction.new(prediction.shape)
    box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2)
    box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2)
    box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2)
    box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2)
    prediction[:, :, :4] = box_a[:, :, :4]

    batch_size = prediction.size(0)

    output = prediction.new(1, prediction.size(2) + 1)
    write = False

    for ind in range(batch_size):
        #select the image from the batch
        image_pred = prediction[ind]

        #Get the class having maximum score, and the index of that class
        #Get rid of num_classes softmax scores
        #Add the class index and the class score of class having maximum score
        max_conf, max_conf_score = torch.max(image_pred[:, 5:5 + num_classes],
                                             1)
        max_conf = max_conf.half().unsqueeze(1)
        max_conf_score = max_conf_score.half().unsqueeze(1)
        seq = (image_pred[:, :5], max_conf, max_conf_score)
        image_pred = torch.cat(seq, 1)

        #Get rid of the zero entries
        non_zero_ind = (torch.nonzero(image_pred[:, 4]))
        try:
            image_pred_ = image_pred[non_zero_ind.squeeze(), :]
        except:
            continue

        #Get the various classes detected in the image
        img_classes = unique(image_pred_[:, -1].long()).half()

        #WE will do NMS classwise
        for cls in img_classes:
            #get the detections with one particular class
            cls_mask = image_pred_ * (image_pred_[:, -1]
                                      == cls).half().unsqueeze(1)
            class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze()

            image_pred_class = image_pred_[class_mask_ind]

            #sort the detections such that the entry with the maximum objectness
            #confidence is at the top
            conf_sort_index = torch.sort(image_pred_class[:, 4],
                                         descending=True)[1]
            image_pred_class = image_pred_class[conf_sort_index]
            idx = image_pred_class.size(0)

            #if nms has to be done
            if nms:
                #For each detection
                for i in range(idx):
                    #Get the IOUs of all boxes that come after the one we are looking at
                    #in the loop
                    try:
                        ious = bbox_iou(image_pred_class[i].unsqueeze(0),
                                        image_pred_class[i + 1:])
                    except ValueError:
                        break

                    except IndexError:
                        break

                    #Zero out all the detections that have IoU > treshhold
                    iou_mask = (ious < nms_conf).half().unsqueeze(1)
                    image_pred_class[i + 1:] *= iou_mask

                    #Remove the non-zero entries
                    non_zero_ind = torch.nonzero(
                        image_pred_class[:, 4]).squeeze()
                    image_pred_class = image_pred_class[non_zero_ind]

            #Concatenate the batch_id of the image to the detection
            #this helps us identify which image does the detection correspond to
            #We use a linear straucture to hold ALL the detections from the batch
            #the batch_dim is flattened
            #batch is identified by extra batch column
            batch_ind = image_pred_class.new(image_pred_class.size(0),
                                             1).fill_(ind)
            seq = batch_ind, image_pred_class

            if not write:
                output = torch.cat(seq, 1)
                write = True
            else:
                out = torch.cat(seq, 1)
                output = torch.cat((output, out))

    return output
コード例 #49
0
      else:
          # Simply repeat the boxes, once for each class
          pred_boxes = np.tile(boxes, (1, scores.shape[1]))

      pred_boxes /= data[1][0][2]

      scores = scores.squeeze()
      pred_boxes = pred_boxes.squeeze()
      det_toc = time.time()
      detect_time = det_toc - det_tic
      misc_tic = time.time()
      if vis:
          im = cv2.imread(imdb.image_path_at(i))
          im2show = np.copy(im)
      for j in xrange(1, imdb.num_classes):
          inds = torch.nonzero(scores[:,j]>thresh).view(-1)
          # if there is det
          if inds.numel() > 0:
            cls_scores = scores[:,j][inds]
            _, order = torch.sort(cls_scores, 0, True)
            if args.class_agnostic:
              cls_boxes = pred_boxes[inds, :]
            else:
              cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

            cls_dets = torch.cat((cls_boxes, cls_scores), 1)
            cls_dets = cls_dets[order]
            keep = nms(cls_dets, cfg.TEST.NMS)
            cls_dets = cls_dets[keep.view(-1).long()]
            if vis:
              im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3)
コード例 #50
0
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= data[1][0][2]

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()
        if vis:
            im = cv2.imread(imdb.image_path_at(i))
            im2show = np.copy(im)
        for j in xrange(1, imdb.num_classes):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_dets, cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                if vis and j == 7:
コード例 #51
0
 def test_nonzero(self):
     x = torch.tensor([[[2., 2.], [1., 0.]], [[0., 0.], [1., 1.]]],
                      requires_grad=True)
     self.assertONNX(lambda x: torch.nonzero(x), x)
コード例 #52
0
ファイル: anchor_target_layer.py プロジェクト: Tung-I/FRCNN
    def forward(self, input):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate 9 anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the 9 anchors
        # filter out-of-image anchors

        rpn_cls_score = input[0]
        gt_boxes = input[1]
        im_info = input[2]
        num_boxes = input[3]

        # map of shape (..., H, W)
        height, width = rpn_cls_score.size(2), rpn_cls_score.size(3)

        batch_size = gt_boxes.size(0)

        feat_height, feat_width = rpn_cls_score.size(2), rpn_cls_score.size(3)
        shift_x = np.arange(0, feat_width) * self._feat_stride
        shift_y = np.arange(0, feat_height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = torch.from_numpy(
            np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                       shift_y.ravel())).transpose())
        shifts = shifts.contiguous().type_as(rpn_cls_score).float()

        A = self._num_anchors
        K = shifts.size(0)

        self._anchors = self._anchors.type_as(
            gt_boxes)  # move to specific gpu.
        all_anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
        all_anchors = all_anchors.view(K * A, 4)

        total_anchors = int(K * A)

        keep = (
            (all_anchors[:, 0] >= -self._allowed_border) &
            (all_anchors[:, 1] >= -self._allowed_border) &
            (all_anchors[:, 2] < long(im_info[0][1]) + self._allowed_border) &
            (all_anchors[:, 3] < long(im_info[0][0]) + self._allowed_border))

        inds_inside = torch.nonzero(keep).view(-1)

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]

        # label: 1 is positive, 0 is negative, -1 is dont care
        labels = gt_boxes.new(batch_size, inds_inside.size(0)).fill_(-1)
        bbox_inside_weights = gt_boxes.new(batch_size,
                                           inds_inside.size(0)).zero_()
        bbox_outside_weights = gt_boxes.new(batch_size,
                                            inds_inside.size(0)).zero_()

        overlaps = bbox_overlaps_batch(anchors, gt_boxes)  # [B, n_anchors, 20]

        max_overlaps, argmax_overlaps = torch.max(overlaps, 2)
        gt_max_overlaps, _ = torch.max(overlaps, 1)

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        gt_max_overlaps[gt_max_overlaps == 0] = 1e-5
        keep = torch.sum(
            overlaps.eq(
                gt_max_overlaps.view(batch_size, 1, -1).expand_as(overlaps)),
            2)

        if torch.sum(keep) > 0:
            labels[keep > 0] = 1

        # fg label: above threshold IOU
        labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)

        sum_fg = torch.sum((labels == 1).int(), 1)
        sum_bg = torch.sum((labels == 0).int(), 1)

        for i in range(batch_size):
            # subsample positive labels if we have too many
            if sum_fg[i] > num_fg:
                fg_inds = torch.nonzero(labels[i] == 1).view(-1)
                # torch.randperm seems has a bug on multi-gpu setting that cause the segfault.
                # See https://github.com/pytorch/pytorch/issues/1868 for more details.
                # use numpy instead.
                #rand_num = torch.randperm(fg_inds.size(0)).type_as(gt_boxes).long()
                rand_num = torch.from_numpy(
                    np.random.permutation(
                        fg_inds.size(0))).type_as(gt_boxes).long()
                disable_inds = fg_inds[rand_num[:fg_inds.size(0) - num_fg]]
                labels[i][disable_inds] = -1


#           num_bg = cfg.TRAIN.RPN_BATCHSIZE - sum_fg[i]
            num_bg = cfg.TRAIN.RPN_BATCHSIZE - torch.sum(
                (labels == 1).int(), 1)[i]

            # subsample negative labels if we have too many
            if sum_bg[i] > num_bg:
                bg_inds = torch.nonzero(labels[i] == 0).view(-1)
                #rand_num = torch.randperm(bg_inds.size(0)).type_as(gt_boxes).long()

                rand_num = torch.from_numpy(
                    np.random.permutation(
                        bg_inds.size(0))).type_as(gt_boxes).long()
                disable_inds = bg_inds[rand_num[:bg_inds.size(0) - num_bg]]
                labels[i][disable_inds] = -1

        offset = torch.arange(0, batch_size) * gt_boxes.size(1)

        argmax_overlaps = argmax_overlaps + offset.view(
            batch_size, 1).type_as(argmax_overlaps)
        bbox_targets = _compute_targets_batch(
            anchors,
            gt_boxes.view(-1, 5)[argmax_overlaps.view(-1), :].view(
                batch_size, -1, 5))

        # use a single value instead of 4 values for easy index.
        bbox_inside_weights[labels == 1] = cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS[0]

        if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
            num_examples = torch.sum(labels[i] >= 0)
            positive_weights = 1.0 / num_examples.item()
            negative_weights = 1.0 / num_examples.item()
        else:
            assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                    (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))

        bbox_outside_weights[labels == 1] = positive_weights
        bbox_outside_weights[labels == 0] = negative_weights

        labels = _unmap(labels,
                        total_anchors,
                        inds_inside,
                        batch_size,
                        fill=-1)
        bbox_targets = _unmap(bbox_targets,
                              total_anchors,
                              inds_inside,
                              batch_size,
                              fill=0)
        bbox_inside_weights = _unmap(bbox_inside_weights,
                                     total_anchors,
                                     inds_inside,
                                     batch_size,
                                     fill=0)
        bbox_outside_weights = _unmap(bbox_outside_weights,
                                      total_anchors,
                                      inds_inside,
                                      batch_size,
                                      fill=0)

        outputs = []

        labels = labels.view(batch_size, height, width,
                             A).permute(0, 3, 1, 2).contiguous()
        labels = labels.view(batch_size, 1, A * height, width)
        outputs.append(labels)

        bbox_targets = bbox_targets.view(batch_size, height, width,
                                         A * 4).permute(0, 3, 1,
                                                        2).contiguous()
        outputs.append(bbox_targets)

        anchors_count = bbox_inside_weights.size(1)
        bbox_inside_weights = bbox_inside_weights.view(
            batch_size, anchors_count, 1).expand(batch_size, anchors_count, 4)

        bbox_inside_weights = bbox_inside_weights.contiguous().view(batch_size, height, width, 4*A)\
                            .permute(0,3,1,2).contiguous()

        outputs.append(bbox_inside_weights)

        bbox_outside_weights = bbox_outside_weights.view(
            batch_size, anchors_count, 1).expand(batch_size, anchors_count, 4)
        bbox_outside_weights = bbox_outside_weights.contiguous().view(batch_size, height, width, 4*A)\
                            .permute(0,3,1,2).contiguous()
        outputs.append(bbox_outside_weights)

        return outputs
def create_non_correspondences(uv_b_matches, img_b_shape, num_non_matches_per_match=100, img_b_mask=None):
    """
    Takes in pixel matches (uv_b_matches) that correspond to matches in another image, and generates non-matches by just sampling in image space.

    Optionally, the non-matches can be sampled from a mask for image b.

    Returns non-matches as pixel positions in image b.

    Please see 'coordinate_conventions.md' documentation for an explanation of pixel coordinate conventions.

    ## Note that arg uv_b_matches are the outputs of batch_find_pixel_correspondences()

    :param uv_b_matches: tuple of torch.FloatTensors, where each FloatTensor is length n, i.e.:
        (torch.FloatTensor, torch.FloatTensor)

    :param img_b_shape: tuple of (H,W) which is the shape of the image

    (optional)
    :param num_non_matches_per_match: int

    (optional)
    :param img_b_mask: torch.FloatTensor (can be cuda or not)
        - masked image, we will select from the non-zero entries
        - shape is H x W
     
    :return: tuple of torch.FloatTensors, i.e. (torch.FloatTensor, torch.FloatTensor).
        - The first element of the tuple is all "u" pixel positions, and the right element of the tuple is all "v" positions
        - Each torch.FloatTensor is of shape torch.Shape([num_matches, non_matches_per_match])
        - This shape makes it so that each row of the non-matches corresponds to the row for the match in uv_a
    """
    image_width  = img_b_shape[1]
    image_height = img_b_shape[0]

    if uv_b_matches == None:
        return None

    num_matches = len(uv_b_matches[0])

    def get_random_uv_b_non_matches():
        return pytorch_rand_select_pixel(width=image_width,height=image_height, 
            num_samples=num_matches*num_non_matches_per_match)

    if img_b_mask is not None:
        img_b_mask_flat = img_b_mask.view(-1,1).squeeze(1)
        mask_b_indices_flat = torch.nonzero(img_b_mask_flat)
        if len(mask_b_indices_flat) == 0:
            print "warning, empty mask b"
            uv_b_non_matches = get_random_uv_b_non_matches()
        else:
            num_samples = num_matches*num_non_matches_per_match
            rand_numbers_b = torch.rand(num_samples)*len(mask_b_indices_flat)
            rand_indices_b = torch.floor(rand_numbers_b).long()
            randomized_mask_b_indices_flat = torch.index_select(mask_b_indices_flat, 0, rand_indices_b).squeeze(1)
            uv_b_non_matches = (randomized_mask_b_indices_flat%image_width, randomized_mask_b_indices_flat/image_width)
    else:
        uv_b_non_matches = get_random_uv_b_non_matches()
    
    # for each in uv_a, we want non-matches
    # first just randomly sample "non_matches"
    # we will later move random samples that were too close to being matches
    uv_b_non_matches = (uv_b_non_matches[0].view(num_matches,num_non_matches_per_match), uv_b_non_matches[1].view(num_matches,num_non_matches_per_match))

    # uv_b_matches can now be used to make sure no "non_matches" are too close
    # to preserve tensor size, rather than pruning, we can perturb these in pixel space
    copied_uv_b_matches_0 = torch.t(uv_b_matches[0].repeat(num_non_matches_per_match, 1))
    copied_uv_b_matches_1 = torch.t(uv_b_matches[1].repeat(num_non_matches_per_match, 1))

    diffs_0 = copied_uv_b_matches_0 - uv_b_non_matches[0].type(dtype_float)
    diffs_1 = copied_uv_b_matches_1 - uv_b_non_matches[1].type(dtype_float)

    diffs_0_flattened = diffs_0.view(-1,1)
    diffs_1_flattened = diffs_1.view(-1,1)

    diffs_0_flattened = torch.abs(diffs_0_flattened).squeeze(1)
    diffs_1_flattened = torch.abs(diffs_1_flattened).squeeze(1)


    need_to_be_perturbed = torch.zeros_like(diffs_0_flattened)
    ones = torch.zeros_like(diffs_0_flattened)
    num_pixels_too_close = 1.0
    threshold = torch.ones_like(diffs_0_flattened)*num_pixels_too_close

    # determine which pixels are too close to being matches
    need_to_be_perturbed = where(diffs_0_flattened < threshold, ones, need_to_be_perturbed)
    need_to_be_perturbed = where(diffs_1_flattened < threshold, ones, need_to_be_perturbed)

    minimal_perturb        = num_pixels_too_close/2
    minimal_perturb_vector = (torch.rand(len(need_to_be_perturbed))*2).floor()*(minimal_perturb*2)-minimal_perturb
    std_dev = 10
    random_vector = torch.randn(len(need_to_be_perturbed))*std_dev + minimal_perturb_vector
    perturb_vector = need_to_be_perturbed*random_vector

    uv_b_non_matches_0_flat = uv_b_non_matches[0].view(-1,1).type(dtype_float).squeeze(1)
    uv_b_non_matches_1_flat = uv_b_non_matches[1].view(-1,1).type(dtype_float).squeeze(1)

    uv_b_non_matches_0_flat = uv_b_non_matches_0_flat + perturb_vector
    uv_b_non_matches_1_flat = uv_b_non_matches_1_flat + perturb_vector

    # now just need to wrap around any that went out of bounds

    # handle wrapping in width
    lower_bound = 0.0
    upper_bound = image_width*1.0 - 1
    lower_bound_vec = torch.ones_like(uv_b_non_matches_0_flat) * lower_bound
    upper_bound_vec = torch.ones_like(uv_b_non_matches_0_flat) * upper_bound

    uv_b_non_matches_0_flat = where(uv_b_non_matches_0_flat > upper_bound_vec, 
        uv_b_non_matches_0_flat - upper_bound_vec, 
        uv_b_non_matches_0_flat)

    uv_b_non_matches_0_flat = where(uv_b_non_matches_0_flat < lower_bound_vec, 
        uv_b_non_matches_0_flat + upper_bound_vec, 
        uv_b_non_matches_0_flat)

    # handle wrapping in height
    lower_bound = 0.0
    upper_bound = image_height*1.0 - 1
    lower_bound_vec = torch.ones_like(uv_b_non_matches_1_flat) * lower_bound
    upper_bound_vec = torch.ones_like(uv_b_non_matches_1_flat) * upper_bound

    uv_b_non_matches_1_flat = where(uv_b_non_matches_1_flat > upper_bound_vec, 
        uv_b_non_matches_1_flat - upper_bound_vec, 
        uv_b_non_matches_1_flat)

    uv_b_non_matches_1_flat = where(uv_b_non_matches_1_flat < lower_bound_vec, 
        uv_b_non_matches_1_flat + upper_bound_vec, 
        uv_b_non_matches_1_flat)

    return (uv_b_non_matches_0_flat.view(num_matches, num_non_matches_per_match),
        uv_b_non_matches_1_flat.view(num_matches, num_non_matches_per_match))
コード例 #54
0
ファイル: rpn_head.py プロジェクト: tuggeluk/mmdetection
    def _get_bboxes_single(self,
                           cls_scores,
                           bbox_preds,
                           mlvl_anchors,
                           img_shape,
                           scale_factor,
                           cfg,
                           rescale=False):
        """Transform outputs for a single batch item into bbox predictions.

        Args:
            cls_scores (list[Tensor]): Box scores for each scale level
                Has shape (num_anchors * num_classes, H, W).
            bbox_preds (list[Tensor]): Box energies / deltas for each scale
                level with shape (num_anchors * 4, H, W).
            mlvl_anchors (list[Tensor]): Box reference for each scale level
                with shape (num_total_anchors, 4).
            img_shape (tuple[int]): Shape of the input image,
                (height, width, 3).
            scale_factor (ndarray): Scale factor of the image arange as
                (w_scale, h_scale, w_scale, h_scale).
            cfg (mmcv.Config): Test / postprocessing configuration,
                if None, test_cfg would be used.
            rescale (bool): If True, return boxes in original image space.

        Returns:
            Tensor: Labeled boxes in shape (n, 5), where the first 4 columns
                are bounding box positions (tl_x, tl_y, br_x, br_y) and the
                5-th column is a score between 0 and 1.
        """
        cfg = self.test_cfg if cfg is None else cfg
        # bboxes from different level should be independent during NMS,
        # level_ids are used as labels for batched NMS to separate them

        level_ids = []
        mlvl_scores = []
        mlvl_bbox_preds = []
        mlvl_valid_anchors = []
        for idx in range(len(cls_scores)):
            rpn_cls_score = cls_scores[idx]
            rpn_bbox_pred = bbox_preds[idx]
            assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:]
            rpn_cls_score = rpn_cls_score.permute(1, 2, 0)
            if self.use_sigmoid_cls:
                rpn_cls_score = rpn_cls_score.reshape(-1)
                scores = rpn_cls_score.sigmoid()
            else:
                rpn_cls_score = rpn_cls_score.reshape(-1, 2)
                # We set FG labels to [0, num_class-1] and BG label to
                # num_class in RPN head since mmdet v2.5, which is unified to
                # be consistent with other head since mmdet v2.0. In mmdet v2.0
                # to v2.4 we keep BG label as 0 and FG label as 1 in rpn head.
                scores = rpn_cls_score.softmax(dim=1)[:, 0]
            rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4)
            anchors = mlvl_anchors[idx]
            if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre:
                # sort is faster than topk
                # _, topk_inds = scores.topk(cfg.nms_pre)
                ranked_scores, rank_inds = scores.sort(descending=True)
                topk_inds = rank_inds[:cfg.nms_pre]
                scores = ranked_scores[:cfg.nms_pre]
                rpn_bbox_pred = rpn_bbox_pred[topk_inds, :]
                anchors = anchors[topk_inds, :]
            mlvl_scores.append(scores)
            mlvl_bbox_preds.append(rpn_bbox_pred)
            mlvl_valid_anchors.append(anchors)
            level_ids.append(
                scores.new_full((scores.size(0), ), idx, dtype=torch.long))

        scores = torch.cat(mlvl_scores)
        anchors = torch.cat(mlvl_valid_anchors)
        rpn_bbox_pred = torch.cat(mlvl_bbox_preds)
        proposals = self.bbox_coder.decode(anchors,
                                           rpn_bbox_pred,
                                           max_shape=img_shape)
        ids = torch.cat(level_ids)

        if cfg.min_bbox_size > 0:
            w = proposals[:, 2] - proposals[:, 0]
            h = proposals[:, 3] - proposals[:, 1]
            valid_inds = torch.nonzero((w >= cfg.min_bbox_size)
                                       & (h >= cfg.min_bbox_size),
                                       as_tuple=False).squeeze()
            if valid_inds.sum().item() != len(proposals):
                proposals = proposals[valid_inds, :]
                scores = scores[valid_inds]
                ids = ids[valid_inds]

        # TODO: remove the hard coded nms type
        nms_cfg = dict(type='nms', iou_threshold=cfg.nms_thr)
        dets, keep = batched_nms(proposals, scores, ids, nms_cfg)
        return dets[:cfg.nms_post]
コード例 #55
0
def cs_fft(m, n, f, mask, mu, beta, n_iter):
    """
    Recovers an image from a subset of its frequencies using FFTs.

    Reconstructs an m x n image from the subset f of its frequencies specified
    by mask, using ADMM with regularization parameter mu, coupling parameter
    beta, and number of iterations n_iter. Unlike function cs_baseline,
    this cs_fft uses FFTs. The computations take place on the CPU(s) in numpy
    when f is a numpy.ndarray and take place on the GPU(s) in ctorch when f is
    a ctorch.ComplexTensor.

    _N.B._: mask[0] must be True in order to make the optimization well-posed.

    Parameters
    ----------
    m : int
        number of rows in the image being reconstructed
    n : int
        number of columns in the image being reconstructed
    f : numpy.ndarray or ctorch.ComplexTensor
        potentially nonzero rows (prior to the inverse Fourier transform)
    mask : numpy.ndarray
        boolean indicators of the positions of the rows in the full m x n array
        -- note that the zero frequency entry must be True in order to make the
        optimization well-posed
    mu : float
        regularization parameter
    beta : float
        coupling parameter for the ADMM iterations
    n_iter : int
        number of ADMM iterations to conduct

    Returns
    -------
    numpy.ndarray or ctorch.ComplexTensor
        reconstructed m x n image
    float
        objective value at the end of the ADMM iterations (see function adm)
    """
    def image_gradient(x):
        """
        First-order finite-differencing both horizontally and vertically.

        Computes a first-order finite-difference approximation to the gradient.

        Parameters
        ----------
        x : numpy.ndarray or ctorch.ComplexTensor
            image (that is, two-dimensional array)

        Returns
        -------
        numpy.ndarray or ctorch.ComplexTensor
            horizontal finite differences of x stacked on top of the vertical
            finite differences (separating horizontal from vertical via the
            initial dimension)
        """
        if isinstance(x, np.ndarray):
            # Wrap the last column of x around to the beginning.
            x_h = np.hstack((x[:, -1:], x))
            # Wrap the last row of x around to the beginning.
            x_v = np.vstack((x[-1:], x))
            # Apply forward differences to the columns of x.
            d_x = (x_h[:, 1:] - x_h[:, :-1])
            # Apply forward differences to the rows of x.
            d_y = (x_v[1:] - x_v[:-1])
            return np.vstack((d_x.ravel(), d_y.ravel()))
        elif isinstance(x, ctorch.ComplexTensor):
            # Wrap the last column of x around to the beginning.
            x_h = ctorch.cat((x[:, -1:], x), dim=1)
            # Wrap the last row of x around to the beginning.
            x_v = ctorch.cat((x[-1:], x), dim=0)
            # Apply forward differences to the columns of x.
            d_x = (x_h[:, 1:] - x_h[:, :-1])
            # Apply forward differences to the rows of x.
            d_y = (x_v[1:] - x_v[:-1])
            return ctorch.cat((d_x, d_y)).view(2, -1)
        else:
            raise TypeError('Input must be a numpy.ndarray ' +
                            'or a ctorch.ComplexTensor.')

    def image_gradient_T(x):
        """
        Transpose of the operator that function image_gradient implements.

        Computes the transpose of the matrix given by function image_gradient.

        Parameters
        ----------
        x : numpy.ndarray or ctorch.ComplexTensor
            stack of two identically shaped arrays

        Returns
        -------
        numpy.ndarray or ctorch.ComplexTensor
            result of applying to x the transpose of function image_gradient
        """
        if isinstance(x, np.ndarray):
            x_h = x[0]
            x_v = x[1]
            # Wrap the first column of x_h around to the end.
            x_h_ext = np.hstack((x_h, x_h[:, :1]))
            # Wrap the first row of x_v around to the end.
            x_v_ext = np.vstack((x_v, x_v[:1]))
            # Apply forward differences to the columns of x.
            d_x = x_h_ext[:, :-1] - x_h_ext[:, 1:]
            # Apply forward differences to the rows of x.
            d_y = x_v_ext[:-1] - x_v_ext[1:]
            return d_x + d_y
        elif isinstance(x, ctorch.ComplexTensor):
            x_h = x[0]
            x_v = x[1]
            # Wrap the first column of x_h around to the end.
            x_h_ext = ctorch.cat((x_h, x_h[:, :1]), dim=1)
            # Wrap the first row of x_v around to the end.
            x_v_ext = ctorch.cat((x_v, x_v[:1]), dim=0)
            # Apply forward differences to the columns of x.
            d_x = x_h_ext[:, :-1] - x_h_ext[:, 1:]
            # Apply forward differences to the rows of x.
            d_y = x_v_ext[:-1] - x_v_ext[1:]
            return d_x + d_y
        else:
            raise TypeError('Input must be a numpy.ndarray ' +
                            'or a ctorch.ComplexTensor.')

    if isinstance(f, np.ndarray):
        assert f.shape[1] == n
        assert mask[0]
        # Rescale f and pad with zeros between the mask samples.
        Ktf = (mu / beta) * zero_padded(m, n, f, mask)
        # Calculate the Fourier transform of the convolutional kernels
        # for finite differences.
        tx = np.abs(np.fft.fft([1, -1] + [0] * (m - 2)))**2
        ty = np.abs(np.fft.fft([1, -1] + [0] * (n - 2)))**2
        # Compute the multipliers required to solve formula (2.8) from Tao-Yang
        # in the Fourier domain. The calculation involves broadcasting the
        # Fourier transform of the convolutional kernel for horizontal finite
        # differences over the vertical directions, and broadcasting both the
        # subsampling mask and the Fourier transform of the convolutional
        # kernel for vertical finite differences over horizontal directions.
        multipliers = 1. / (ty + tx[:, None] + (mu / beta) * mask[:, None])
        # Initialize the primal (x) and dual (la) solutions to zeros.
        x = np.zeros((m, n))
        la = np.zeros((2, m * n))
        # Calculate iterations of alternating minimization.
        for i in range(n_iter):
            # Apply shrinkage via formula (2.7) from Tao-Yang, dividing both
            # arguments of the "max" operator in formula (2.7) by the
            # denominator of the rightmost factor in formula (2.7).
            a = image_gradient(x) + la / beta
            b = scipy.linalg.norm(a, axis=0, keepdims=True)
            if i > 0:
                y = a * np.maximum(1 - 1 / (beta * b), 0)
            else:
                y = np.zeros((2, m * n))
            # Solve formula (2.8) from Tao-Yang in the Fourier domain.
            c = image_gradient_T((y - la / beta).reshape((2, m, n))) + Ktf
            x = np.fft.ifft2(np.fft.fft2(c) * multipliers)
            # Update the Lagrange multipliers via formula (2.9) from Tao-Yang.
            la = la - beta * (y - image_gradient(x))
        # Calculate the loss in formula (1.4) from Tao-Yang...
        loss = np.linalg.norm(image_gradient(x), axis=0).sum()
        # ... adding in the term for the fidelity of the reconstruction.
        loss += np.linalg.norm(np.fft.fft2(x)[mask] / np.sqrt(m * n) -
                               f)**2 * (mu / 2)
        # Discard the imaginary part of the primal solution,
        # returning only the real part and the loss.
        return x.real, loss
    elif isinstance(f, ctorch.ComplexTensor):
        assert f.shape[1] == n
        assert mask[0]
        # Convert the mask from booleans to long integers.
        mask_nnz = torch.nonzero(mask).squeeze(1)
        # Rescale f and pad with zeros between the mask samples.
        Ktf = zero_padded(m, n, f, mask_nnz) * (mu / beta)
        # Calculate the Fourier transform of the convolutional kernels
        # for finite differences.
        tx = np.abs(np.fft.fft([1, -1] + [0] * (m - 2)))**2
        ty = np.abs(np.fft.fft([1, -1] + [0] * (n - 2)))**2
        # Compute the multipliers required to solve formula (2.8) from Tao-Yang
        # in the Fourier domain. The calculation involves broadcasting the
        # Fourier transform of the convolutional kernel for horizontal finite
        # differences over the vertical directions, and broadcasting both the
        # subsampling mask and the Fourier transform of the convolutional
        # kernel for vertical finite differences over horizontal directions.
        multipliers = 1. / (ty + tx[:, None] + mask.cpu().numpy()[:, None] *
                            (mu / beta))
        multipliers = ctorch.from_numpy(multipliers).cuda()
        # Initialize the primal (x) and dual (la) solutions to zeros,
        # creating new ctorch tensors of the same type as f.
        x = f.new(m, n).zero_()
        la = f.new(2, m * n).zero_()
        # Calculate iterations of alternating minimization.
        for i in range(n_iter):
            # Apply shrinkage via formula (2.7) from Tao-Yang, dividing both
            # arguments of the "max" operator in formula (2.7) by the
            # denominator of the rightmost factor in formula (2.7).
            a = image_gradient(x) + la / beta
            b = ctorch.norm(a, p=2, dim=0, keepdim=True)
            if i > 0:
                y = a * torch.clamp(1 - 1 / (beta * b), min=0)
            else:
                y = f.new(2, m * n).zero_()
            # Solve formula (2.8) from Tao-Yang in the Fourier domain.
            c = image_gradient_T((y - la / beta).view(2, m, n)) + Ktf
            x = ctorch.ifft2(ctorch.fft2(c) * multipliers)
            # Update the Lagrange multipliers via formula (2.9) from Tao-Yang.
            la = la - (y - image_gradient(x)) * beta
        # Calculate the loss in formula (1.4) from Tao-Yang...
        loss = ctorch.norm(image_gradient(x), p=2, dim=0).sum()
        # ... adding in the term for the fidelity of the reconstruction.
        loss += ctorch.norm(ctorch.fft2(x)[mask_nnz] / math.sqrt(m * n) -
                            f)**2 * (mu / 2)
        # Discard the imaginary part of the primal solution,
        # returning only the real part and the loss.
        return x.real, loss.item()
    else:
        raise TypeError('Input must be a numpy.ndarray ' +
                        'or a ctorch.ComplexTensor.')
コード例 #56
0
def getProposals(obj_pc,
                 grids,
                 center,
                 index,
                 scores,
                 data_index,
                 radius=0.022 * np.sqrt(3),
                 local_th=0.011,
                 local_pn=100):
    center = center.squeeze(0)
    index = index.squeeze(0)
    scores = scores.squeeze(0)
    grids = grids.squeeze(0)
    obj_pc = obj_pc.squeeze(0) * torch.FloatTensor([0.22 / 2, 0.22 / 2, 0.22
                                                    ]).to(obj_pc.device)
    contact = obj_pc[index]

    cent_grid_dist_matrix = dist_matrix_torch(center, grids)
    point_dist = dist_matrix_torch(contact, obj_pc)

    con_num = index.size(0)
    grid_num = grids.size(0)
    pn_num = obj_pc.size(0)

    # get contact-grid pairs
    contact_exp = contact.view(-1, 1, 3).expand(-1, grid_num, -1)
    grids_exp = grids.view(1, -1, 3).expand(con_num, -1, -1)
    pairs_ = torch.stack([contact_exp, grids_exp], 2).view(-1, 2,
                                                           3).unsqueeze(0)
    pairs = pairs_.cpu()
    del pairs_

    # get positive and negative proposals
    select = (cent_grid_dist_matrix < radius).float()
    posi_prop_idx_ = torch.nonzero(select.view(-1)).view(-1)
    nega_prop_idx_ = torch.nonzero(select.view(-1) == 0).view(-1)
    posi_prop_idx, nega_prop_idx = posi_prop_idx_.cpu(), nega_prop_idx_.cpu()

    offsets_ = (grids_exp - center.view(-1, 1, 3)).view(1, -1, 3) / radius
    offsets = (offsets_ * select.view(1, -1, 1)).cpu()
    del offsets_
    # get proposals scores
    scores_all_ = scores.view(-1, 1) * select
    scores_all_ = select * scores_all_
    scores_all = scores_all_.view(1, -1).cpu()
    posi_prop_scores = scores_all_.view(-1)[posi_prop_idx_]
    posi_idx_ = torch.nonzero(posi_prop_scores).view(
        -1)  # positive proposals associated to positive grasps
    nega_idx_ = torch.nonzero(posi_prop_scores == 0).view(
        -1)  # positive proposals associated to negative grasps
    posi_idx, nega_idx = posi_idx_.cpu(), nega_idx_.cpu()
    anti_label = select.view(1, -1).cpu()  # proposals labels
    del (scores_all_, posi_prop_idx_, nega_prop_idx_, posi_idx_, nega_idx_)

    # get local points
    pg_vec = contact_exp - grids_exp  # vectors from grids to contacts
    pg_vec = pg_vec / torch.sqrt(torch.sum(pg_vec**2, -1, keepdim=True))
    obj_pc_exp = obj_pc.view(1, -1, 3)  #.expand(con_num, -1, -1)
    pp_vec = obj_pc_exp - contact.view(
        -1, 1, 3)  # vectors from contacts to other points
    point_dist_view = point_dist.view(con_num, -1, 1)
    pp_vec = pp_vec / point_dist_view
    del (obj_pc_exp, contact_exp, grids_exp, cent_grid_dist_matrix, point_dist)

    data_num = grid_num * con_num * pn_num
    num = data_num // 5e8 + 1
    num = int(num)
    delta = (con_num + num - 1) // num
    local_points_list = []
    # in case of out of memory
    for i in range(num):
        s = delta * i
        e = delta * (i + 1)
        if i + 1 == num:
            e = max(e, con_num)
        dist_ = point_dist_view[s:e].transpose(1, 2) * (
            1.0 + torch.abs(pg_vec[s:e].matmul(pp_vec[s:e].transpose(1, 2))))
        dist = dist_.to('cuda:0')
        local_points = pu.matrix_k_min(local_th, local_pn, dist).long()
        local_points_list.append(local_points)
        del (dist_, dist)
    local_points = torch.cat(local_points_list, 0).view(1, -1, local_pn).long()

    assert local_points.size(1) == con_num * grid_num, local_points.size(1)
    data_index = data_index.new(con_num, grid_num).zero_() + data_index.view(
        -1, 1)
    data_index = data_index.view(1, -1)
    del (select, pg_vec, pp_vec)

    return pairs, scores_all, offsets, local_points, data_index, anti_label, posi_prop_idx, nega_prop_idx, posi_idx, nega_idx
コード例 #57
0
    def _sample_rois_pytorch(self, all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
        """Generate a random sample of RoIs comprising foreground and background
        examples.
        """
        # overlaps: (rois x gt_boxes)

        overlaps = bbox_overlaps_batch(all_rois, gt_boxes)

        max_overlaps, gt_assignment = torch.max(overlaps, 2)

        batch_size = overlaps.size(0)
        num_proposal = overlaps.size(1)
        num_boxes_per_img = overlaps.size(2)

        offset = torch.arange(0, batch_size)*gt_boxes.size(1)
        offset = offset.view(-1, 1).type_as(gt_assignment) + gt_assignment

        labels = gt_boxes[:,:,4].contiguous().view(-1).index((offset.view(-1),)).view(batch_size, -1)
        
        labels_batch = labels.new(batch_size, rois_per_image).zero_()
        rois_batch  = all_rois.new(batch_size, rois_per_image, 5).zero_()
        gt_rois_batch = all_rois.new(batch_size, rois_per_image, 5).zero_()
        # Guard against the case when an image has fewer than max_fg_rois_per_image
        # foreground RoIs
        for i in range(batch_size):

            fg_inds = torch.nonzero(max_overlaps[i] >= cfg.TRAIN.FG_THRESH).view(-1)
            fg_num_rois = fg_inds.numel()

            # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
            bg_inds = torch.nonzero((max_overlaps[i] < cfg.TRAIN.BG_THRESH_HI) &
                                    (max_overlaps[i] >= cfg.TRAIN.BG_THRESH_LO)).view(-1)
            bg_num_rois = bg_inds.numel()

            if fg_num_rois > 0 and bg_num_rois > 0:
                # sampling fg
                fg_rois_per_this_image = min(fg_rois_per_image, fg_num_rois)

                # torch.randperm seems has a bug on multi-gpu setting that cause the segfault.
                # See https://github.com/pytorch/pytorch/issues/1868 for more details.
                # use numpy instead.
                #rand_num = torch.randperm(fg_num_rois).long().cuda()
                rand_num = torch.from_numpy(np.random.permutation(fg_num_rois)).type_as(gt_boxes).long()
                fg_inds = fg_inds[rand_num[:fg_rois_per_this_image]]

                # sampling bg
                bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image

                # Seems torch.rand has a bug, it will generate very large number and make an error.
                # We use numpy rand instead.
                #rand_num = (torch.rand(bg_rois_per_this_image) * bg_num_rois).long().cuda()
                rand_num = np.floor(np.random.rand(bg_rois_per_this_image) * bg_num_rois)
                rand_num = torch.from_numpy(rand_num).type_as(gt_boxes).long()
                bg_inds = bg_inds[rand_num]

            elif fg_num_rois > 0 and bg_num_rois == 0:
                # sampling fg
                #rand_num = torch.floor(torch.rand(rois_per_image) * fg_num_rois).long().cuda()
                rand_num = np.floor(np.random.rand(rois_per_image) * fg_num_rois)
                rand_num = torch.from_numpy(rand_num).type_as(gt_boxes).long()
                fg_inds = fg_inds[rand_num]
                fg_rois_per_this_image = rois_per_image
                bg_rois_per_this_image = 0
            elif bg_num_rois > 0 and fg_num_rois == 0:
                # sampling bg
                #rand_num = torch.floor(torch.rand(rois_per_image) * bg_num_rois).long().cuda()
                rand_num = np.floor(np.random.rand(rois_per_image) * bg_num_rois)
                rand_num = torch.from_numpy(rand_num).type_as(gt_boxes).long()

                bg_inds = bg_inds[rand_num]
                bg_rois_per_this_image = rois_per_image
                fg_rois_per_this_image = 0
            else:
                raise ValueError("bg_num_rois = 0 and fg_num_rois = 0, this should not happen!")

            # The indices that we're selecting (both fg and bg)
            keep_inds = torch.cat([fg_inds, bg_inds], 0)

            # Select sampled values from various arrays:
            labels_batch[i].copy_(labels[i][keep_inds])

            # Clamp labels for the background RoIs to 0
            if fg_rois_per_this_image < rois_per_image:
                labels_batch[i][fg_rois_per_this_image:] = 0

            rois_batch[i] = all_rois[i][keep_inds]
            rois_batch[i,:,0] = i

            gt_rois_batch[i] = gt_boxes[i][gt_assignment[i][keep_inds]]

        bbox_target_data = self._compute_targets_pytorch(
                rois_batch[:,:,1:5], gt_rois_batch[:,:,:4])

        bbox_targets, bbox_inside_weights = \
                self._get_bbox_regression_labels_pytorch(bbox_target_data, labels_batch, num_classes)

        return labels_batch, rois_batch, bbox_targets, bbox_inside_weights
    def interpolate(self, x_grid, x_target, interp_points=range(-2, 2)):
        # Do some boundary checking
        grid_mins = x_grid.min(0)[0]
        grid_maxs = x_grid.max(0)[0]
        x_target_min = x_target.min(0)[0]
        x_target_max = x_target.min(0)[0]
        lt_min_mask = (x_target_min - grid_mins).lt(-1e-7)
        gt_max_mask = (x_target_max - grid_maxs).gt(1e-7)
        if lt_min_mask.sum().item():
            first_out_of_range = lt_min_mask.nonzero().squeeze(1)[0].item()
            raise RuntimeError(
                (
                    "Received data that was out of bounds for the specified grid. "
                    "Grid bounds were ({0:.3f}, {0:.3f}), but min = {0:.3f}, "
                    "max = {0:.3f}"
                ).format(
                    grid_mins[first_out_of_range].item(),
                    grid_maxs[first_out_of_range].item(),
                    x_target_min[first_out_of_range].item(),
                    x_target_max[first_out_of_range].item(),
                )
            )
        if gt_max_mask.sum().item():
            first_out_of_range = gt_max_mask.nonzero().squeeze(1)[0].item()
            raise RuntimeError(
                (
                    "Received data that was out of bounds for the specified grid. "
                    "Grid bounds were ({0:.3f}, {0:.3f}), but min = {0:.3f}, "
                    "max = {0:.3f}"
                ).format(
                    grid_mins[first_out_of_range].item(),
                    grid_maxs[first_out_of_range].item(),
                    x_target_min[first_out_of_range].item(),
                    x_target_max[first_out_of_range].item(),
                )
            )

        # Now do interpolation
        interp_points = torch.tensor(interp_points, dtype=x_grid.dtype, device=x_grid.device)
        interp_points_flip = interp_points.flip(0)

        num_grid_points = x_grid.size(0)
        num_target_points = x_target.size(0)
        num_dim = x_target.size(-1)
        num_coefficients = len(interp_points)

        interp_values = torch.ones(
            num_target_points, num_coefficients ** num_dim, dtype=x_grid.dtype, device=x_grid.device
        )
        interp_indices = torch.zeros(
            num_target_points, num_coefficients ** num_dim, dtype=torch.long, device=x_grid.device
        )

        for i in range(num_dim):
            grid_delta = x_grid[1, i] - x_grid[0, i]
            lower_grid_pt_idxs = torch.floor((x_target[:, i] - x_grid[0, i]) / grid_delta).squeeze()
            lower_pt_rel_dists = (x_target[:, i] - x_grid[0, i]) / grid_delta - lower_grid_pt_idxs
            lower_grid_pt_idxs = lower_grid_pt_idxs - interp_points.max()
            lower_grid_pt_idxs.detach_()

            if len(lower_grid_pt_idxs.shape) == 0:
                lower_grid_pt_idxs = lower_grid_pt_idxs.unsqueeze(0)

            scaled_dist = lower_pt_rel_dists.unsqueeze(-1) + interp_points_flip.unsqueeze(-2)
            dim_interp_values = self._cubic_interpolation_kernel(scaled_dist)

            # Find points who's closest lower grid point is the first grid point
            # This corresponds to a boundary condition that we must fix manually.
            left_boundary_pts = torch.nonzero(lower_grid_pt_idxs < 1)
            num_left = len(left_boundary_pts)

            if num_left > 0:
                left_boundary_pts.squeeze_(1)
                x_grid_first = x_grid[:num_coefficients, i].unsqueeze(1).t().expand(num_left, num_coefficients)

                grid_targets = x_target.select(1, i)[left_boundary_pts].unsqueeze(1).expand(num_left, num_coefficients)
                dists = torch.abs(x_grid_first - grid_targets)
                closest_from_first = torch.min(dists, 1)[1]

                for j in range(num_left):
                    dim_interp_values[left_boundary_pts[j], :] = 0
                    dim_interp_values[left_boundary_pts[j], closest_from_first[j]] = 1
                    lower_grid_pt_idxs[left_boundary_pts[j]] = 0

            right_boundary_pts = torch.nonzero(lower_grid_pt_idxs > num_grid_points - num_coefficients)
            num_right = len(right_boundary_pts)

            if num_right > 0:
                right_boundary_pts.squeeze_(1)
                x_grid_last = x_grid[-num_coefficients:, i].unsqueeze(1).t().expand(num_right, num_coefficients)

                grid_targets = x_target.select(1, i)[right_boundary_pts].unsqueeze(1)
                grid_targets = grid_targets.expand(num_right, num_coefficients)
                dists = torch.abs(x_grid_last - grid_targets)
                closest_from_last = torch.min(dists, 1)[1]

                for j in range(num_right):
                    dim_interp_values[right_boundary_pts[j], :] = 0
                    dim_interp_values[right_boundary_pts[j], closest_from_last[j]] = 1
                    lower_grid_pt_idxs[right_boundary_pts[j]] = num_grid_points - num_coefficients

            offset = (interp_points - interp_points.min()).long().unsqueeze(-2)
            dim_interp_indices = lower_grid_pt_idxs.long().unsqueeze(-1) + offset

            n_inner_repeat = num_coefficients ** i
            n_outer_repeat = num_coefficients ** (num_dim - i - 1)
            index_coeff = num_grid_points ** (num_dim - i - 1)
            dim_interp_indices = dim_interp_indices.unsqueeze(-1).repeat(1, n_inner_repeat, n_outer_repeat)
            dim_interp_values = dim_interp_values.unsqueeze(-1).repeat(1, n_inner_repeat, n_outer_repeat)
            interp_indices = interp_indices.add(dim_interp_indices.view(num_target_points, -1).mul(index_coeff))
            interp_values = interp_values.mul(dim_interp_values.view(num_target_points, -1))

        return interp_indices, interp_values
コード例 #59
0
    def forward(self, input):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate 9 anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the 9 anchors
        # filter out-of-image anchors

        rpn_cls_score = input[0]
        gt_boxes = input[1]
        im_info = input[2]
        num_boxes = input[3]

        # map of shape (..., H, W)
        height, width = rpn_cls_score.size(2), rpn_cls_score.size(3)

        batch_size = gt_boxes.size(0)

        feat_height, feat_width = rpn_cls_score.size(2), rpn_cls_score.size(3)
        shift_x = np.arange(0, feat_width) * self._feat_stride
        shift_y = np.arange(0, feat_height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(),
                                  shift_x.ravel(), shift_y.ravel())).transpose())
        shifts = shifts.contiguous().type_as(rpn_cls_score).float()

        A = self._num_anchors
        K = shifts.size(0)

        self._anchors = self._anchors.type_as(gt_boxes) # move to specific gpu.
        all_anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
        all_anchors = all_anchors.view(K * A, 4)

        total_anchors = int(K * A)

        keep = ((all_anchors[:, 0] >= -self._allowed_border) &
                (all_anchors[:, 1] >= -self._allowed_border) &
                (all_anchors[:, 2] < long(im_info[0][1]) + self._allowed_border) &
                (all_anchors[:, 3] < long(im_info[0][0]) + self._allowed_border))

        inds_inside = torch.nonzero(keep).view(-1)

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]

        # label: 1 is positive, 0 is negative, -1 is dont care
        labels = gt_boxes.new(batch_size, inds_inside.size(0)).fill_(-1)
        bbox_inside_weights = gt_boxes.new(batch_size, inds_inside.size(0)).zero_()
        bbox_outside_weights = gt_boxes.new(batch_size, inds_inside.size(0)).zero_()

        overlaps = bbox_overlaps_batch(anchors, gt_boxes)

        max_overlaps, argmax_overlaps = torch.max(overlaps, 2)
        gt_max_overlaps, _ = torch.max(overlaps, 1)

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        gt_max_overlaps[gt_max_overlaps==0] = 1e-5
        keep = torch.sum(overlaps.eq(gt_max_overlaps.view(batch_size,1,-1).expand_as(overlaps)), 2)

        if torch.sum(keep) > 0:
            labels[keep>0] = 1

        # fg label: above threshold IOU
        labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)

        sum_fg = torch.sum((labels == 1).int(), 1)
        sum_bg = torch.sum((labels == 0).int(), 1)

        for i in range(batch_size):
            # subsample positive labels if we have too many
            if sum_fg[i] > num_fg:
                fg_inds = torch.nonzero(labels[i] == 1).view(-1)
                # torch.randperm seems has a bug on multi-gpu setting that cause the segfault. 
                # See https://github.com/pytorch/pytorch/issues/1868 for more details.
                # use numpy instead.                
                #rand_num = torch.randperm(fg_inds.size(0)).type_as(gt_boxes).long()
                rand_num = torch.from_numpy(np.random.permutation(fg_inds.size(0))).type_as(gt_boxes).long()
                disable_inds = fg_inds[rand_num[:fg_inds.size(0)-num_fg]]
                labels[i][disable_inds] = -1

            num_bg = cfg.TRAIN.RPN_BATCHSIZE - sum_fg[i]

            # subsample negative labels if we have too many
            if sum_bg[i] > num_bg:
                bg_inds = torch.nonzero(labels[i] == 0).view(-1)
                #rand_num = torch.randperm(bg_inds.size(0)).type_as(gt_boxes).long()

                rand_num = torch.from_numpy(np.random.permutation(bg_inds.size(0))).type_as(gt_boxes).long()
                disable_inds = bg_inds[rand_num[:bg_inds.size(0)-num_bg]]
                labels[i][disable_inds] = -1

        offset = torch.arange(0, batch_size)*gt_boxes.size(1)

        argmax_overlaps = argmax_overlaps + offset.view(batch_size, 1).type_as(argmax_overlaps)
        bbox_targets = _compute_targets_batch(anchors, gt_boxes.view(-1,5)[argmax_overlaps.view(-1), :].view(batch_size, -1, 5))

        # use a single value instead of 4 values for easy index.
        bbox_inside_weights[labels==1] = cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS[0]

        if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
            num_examples = torch.sum(labels[i] >= 0)
            positive_weights = 1.0 / num_examples
            negative_weights = 1.0 / num_examples
        else:
            assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                    (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))

        bbox_outside_weights[labels == 1] = positive_weights
        bbox_outside_weights[labels == 0] = negative_weights

        labels = _unmap(labels, total_anchors, inds_inside, batch_size, fill=-1)
        bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, batch_size, fill=0)
        bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, batch_size, fill=0)
        bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, batch_size, fill=0)

        outputs = []

        labels = labels.view(batch_size, height, width, A).permute(0,3,1,2).contiguous()
        labels = labels.view(batch_size, 1, A * height, width)
        outputs.append(labels)

        bbox_targets = bbox_targets.view(batch_size, height, width, A*4).permute(0,3,1,2).contiguous()
        outputs.append(bbox_targets)

        anchors_count = bbox_inside_weights.size(1)
        bbox_inside_weights = bbox_inside_weights.view(batch_size,anchors_count,1).expand(batch_size, anchors_count, 4)

        bbox_inside_weights = bbox_inside_weights.contiguous().view(batch_size, height, width, 4*A)\
                            .permute(0,3,1,2).contiguous()

        outputs.append(bbox_inside_weights)

        bbox_outside_weights = bbox_outside_weights.view(batch_size,anchors_count,1).expand(batch_size, anchors_count, 4)
        bbox_outside_weights = bbox_outside_weights.contiguous().view(batch_size, height, width, 4*A)\
                            .permute(0,3,1,2).contiguous()
        outputs.append(bbox_outside_weights)

        return outputs
コード例 #60
0
    def forward(self, input, input_map, coords, batch_idxs, batch_offsets, epoch):
        '''
        :param input_map: (N), int, cuda
        :param coords: (N, 3), float, cuda
        :param batch_idxs: (N), int, cuda
        :param batch_offsets: (B + 1), int, cuda
        '''
        ret = {}

        output = self.input_conv(input)
        output = self.unet(output)
        output = self.output_layer(output)
        output_feats = output.features[input_map.long()]

        #### semantic segmentation
        semantic_scores = self.linear(output_feats)   # (N, nClass), float
        semantic_preds = semantic_scores.max(1)[1]    # (N), long

        ret['semantic_scores'] = semantic_scores

        #### offset
        pt_offsets_feats = self.offset(output_feats)
        pt_offsets = self.offset_linear(pt_offsets_feats)   # (N, 3), float32

        ret['pt_offsets'] = pt_offsets

        #if(epoch > self.prepare_epochs):
            #### get prooposal clusters
        object_idxs = torch.nonzero(semantic_preds > 1).view(-1)

        batch_idxs_ = batch_idxs[object_idxs]
        batch_offsets_ = utils.get_batch_offsets(batch_idxs_, input.batch_size)
        coords_ = coords[object_idxs]
        pt_offsets_ = pt_offsets[object_idxs]

        semantic_preds_cpu = semantic_preds[object_idxs].int().cpu()

        idx_shift, start_len_shift = pointgroup_ops.ballquery_batch_p(coords_ + pt_offsets_, batch_idxs_, batch_offsets_, self.cluster_radius, self.cluster_shift_meanActive)
        proposals_idx_shift, proposals_offset_shift = pointgroup_ops.bfs_cluster(semantic_preds_cpu, idx_shift.cpu(), start_len_shift.cpu(), self.cluster_npoint_thre)
        proposals_idx_shift[:, 1] = object_idxs[proposals_idx_shift[:, 1].long()].int()
        # proposals_idx_shift: (sumNPoint, 2), int, dim 0 for cluster_id, dim 1 for corresponding point idxs in N
        # proposals_offset_shift: (nProposal + 1), int

        idx, start_len = pointgroup_ops.ballquery_batch_p(coords_, batch_idxs_, batch_offsets_, self.cluster_radius, self.cluster_meanActive)
        proposals_idx, proposals_offset = pointgroup_ops.bfs_cluster(semantic_preds_cpu, idx.cpu(), start_len.cpu(), self.cluster_npoint_thre)
        proposals_idx[:, 1] = object_idxs[proposals_idx[:, 1].long()].int()
        # proposals_idx: (sumNPoint, 2), int, dim 0 for cluster_id, dim 1 for corresponding point idxs in N
        # proposals_offset: (nProposal + 1), int

        proposals_idx_shift[:, 0] += (proposals_offset.size(0) - 1)
        proposals_offset_shift += proposals_offset[-1]
        proposals_idx = torch.cat((proposals_idx, proposals_idx_shift), dim=0)
        proposals_offset = torch.cat((proposals_offset, proposals_offset_shift[1:]))

        #### proposals voxelization again
        input_feats, inp_map = self.clusters_voxelization(proposals_idx, proposals_offset, output_feats, coords, self.score_fullscale, self.score_scale, self.mode)

        #### score
        score = self.score_unet(input_feats)
        score = self.score_outputlayer(score)
        score_feats = score.features[inp_map.long()] # (sumNPoint, C)
        score_feats = pointgroup_ops.roipool(score_feats, proposals_offset.cuda())  # (nProposal, C)
        scores = self.score_linear(score_feats)  # (nProposal, 1)

        ret['proposal_scores'] = (scores, score_feats, proposals_idx, proposals_offset)

        return ret