Example #1
0
    def _get_predicted_bounding_boxes(self, predicted_center_x_values,
                                      predicted_center_y_values,
                                      predicted_width_values,
                                      predicted_height_values, batch_size,
                                      feature_map_width, feature_map_height):
        default_shape = batch_size * self.num_anchors * feature_map_height * feature_map_width

        pred_boxes = self._to_cuda(FloatTensor(4, default_shape))

        grid_x = linspace(0, feature_map_width - 1, feature_map_width).repeat(
            feature_map_height, 1).repeat(batch_size * self.num_anchors, 1,
                                          1).view(default_shape)
        grid_x = self._to_cuda(grid_x)

        grid_y = linspace(0,
                          feature_map_height - 1, feature_map_height).repeat(
                              feature_map_width,
                              1).t().repeat(batch_size * self.num_anchors, 1,
                                            1).view(default_shape)
        grid_y = self._to_cuda(grid_y)

        anchor_w = self._to_cuda(Tensor(self.anchors)).view(
            self.num_anchors, 2).index_select(1,
                                              self._to_cuda(LongTensor([0])))
        anchor_w = anchor_w.repeat(batch_size, 1).repeat(
            1, 1, feature_map_height * feature_map_width)
        anchor_w = anchor_w.view(default_shape)

        anchor_h = self._to_cuda(Tensor(self.anchors)).view(
            self.num_anchors, 2).index_select(1,
                                              self._to_cuda(LongTensor([1])))
        anchor_h = anchor_h.repeat(batch_size, 1).repeat(
            1, 1, feature_map_height * feature_map_width)
        anchor_h = anchor_h.view(default_shape)

        # https://github.com/marvis/pytorch-yolo2/issues/131#issuecomment-460989919
        pred_boxes[0] = torch_reshape(predicted_center_x_values.data,
                                      (1, default_shape)) + grid_x
        pred_boxes[1] = torch_reshape(predicted_center_y_values.data,
                                      (1, default_shape)) + grid_y
        pred_boxes[2] = torch_reshape(torch_exp(predicted_width_values.data),
                                      (1, default_shape)) * anchor_w
        pred_boxes[3] = torch_reshape(torch_exp(predicted_height_values.data),
                                      (1, default_shape)) * anchor_h

        return self._convert_to_cpu(
            pred_boxes.transpose(0, 1).contiguous().view(-1, 4))
Example #2
0
def gcxgcy_to_cxcy(gcxgcy, priors_cxcy):
    """Decodes bounding boxes from the corresponding prior boxes, both in center-size coordinates form, as used in SSD
    rewrite in PyTorch.

    This is implemented as shown in https://github.com/sgrvinod/a-PyTorch-Tutorial-to-Object-Detection. Some 
    modifications are made. All credits to @sgrvinod.
    """

    return torch_cat([
        gcxgcy[:, :2] * priors_cxcy[:, 2:] / 10 + priors_cxcy[:, :2],
        torch_exp(gcxgcy[:, 2:] / 5) * priors_cxcy[:, 2:]
    ], 1)
    def __init__(self, token_representation_dimension: int, dropout_prob:
                 float, max_sequence_length: int) -> None:
        super(PositionalEncoding, self).__init__()
        self.dropout_layer = Dropout(p=dropout_prob)

        # defining positional signals added to embeddings:

        # initialization:
        positional_signals = torch_zeros(
            (max_sequence_length, token_representation_dimension),
            requires_grad=False
        )

        positions = torch_arange(
            start=0,
            end=max_sequence_length,
            requires_grad=False
        ).unsqueeze(dim=1)

        wave_inputs = positions * torch_exp(
            torch_arange(
                start=0, end=token_representation_dimension, step=2
            ) * (-log(10000.0) / token_representation_dimension)
        )  # ✓ see demonstration on my notes ▢

        # interleaving sinusoidal and cosinusoidal components along feature
        # dimension (starting with sine), yielding positional signals for
        # all the allowed positions (for sequences up to the maximum allowed
        # length):
        positional_signals[:, 0::2] = torch_sin(wave_inputs)
        positional_signals[:, 1::2] = torch_cos(wave_inputs)
        positional_signals = positional_signals.unsqueeze(dim=0)

        # parameters not requiring backpropagation (i.e. gradient computation
        # and update):
        self.register_buffer('positional_signals', positional_signals)
Example #4
0
    def detect_objects(self, image_as_tensor, confidence_threshold,
                       nms_threshold):
        self.forward(image_as_tensor)

        yolov2_loss = self.loss_function.layer
        num_anchors = yolov2_loss.num_anchors

        if self.predictions.dim() == 3:
            self.predictions = self.predictions.unsqueeze(0)

        assert self.predictions.size(1) == (5 + self.class_count) * num_anchors

        batch_size = self.predictions.size(0)
        feature_map_height = self.predictions.size(2)
        feature_map_width = self.predictions.size(3)
        number_of_pixels = feature_map_height * feature_map_width
        view_size = batch_size * num_anchors * number_of_pixels

        output = self.predictions.view(
            batch_size * num_anchors, 5 + self.class_count,
            number_of_pixels).transpose(0, 1).contiguous().view(
                5 + self.class_count, view_size)
        output = output.cuda() if self.use_cuda else output

        grid_x = torch_linspace(0, feature_map_width - 1,
                                feature_map_width).repeat(
                                    feature_map_height,
                                    1).repeat(batch_size * num_anchors, 1,
                                              1).view(view_size)
        grid_x = grid_x.cuda() if self.use_cuda else grid_x

        grid_y = torch_linspace(0, feature_map_height - 1,
                                feature_map_height).repeat(
                                    feature_map_width,
                                    1).t().repeat(batch_size * num_anchors, 1,
                                                  1).view(view_size)
        grid_y = grid_y.cuda() if self.use_cuda else grid_y

        anchor_w = Tensor(yolov2_loss.anchors).view(
            num_anchors,
            yolov2_loss.anchor_step).index_select(1, LongTensor([0])).repeat(
                batch_size, 1).repeat(1, 1, number_of_pixels).view(view_size)
        anchor_w = anchor_w.cuda() if self.use_cuda else anchor_w

        anchor_h = Tensor(yolov2_loss.anchors).view(
            num_anchors,
            yolov2_loss.anchor_step).index_select(1, LongTensor([1])).repeat(
                batch_size, 1).repeat(1, 1, number_of_pixels).view(view_size)
        anchor_h = anchor_h.cuda() if self.use_cuda else anchor_h

        class_scores = Softmax()(output[5:5 + self.class_count].transpose(
            0, 1)).data
        max_class_scores, top_classes = torch_max(class_scores, 1)

        objectness_confidences = torch_sigmoid(output[4])
        max_class_scores = max_class_scores.view(-1)
        top_classes = top_classes.view(-1)

        confidences = objectness_confidences * max_class_scores
        objectness_confidences = objectness_confidences[
            confidences > confidence_threshold]
        x_predictions = (torch_sigmoid(output[0]) + grid_x)[
            confidences > confidence_threshold] / feature_map_width
        y_predictions = (torch_sigmoid(output[1]) + grid_y)[
            confidences > confidence_threshold] / feature_map_height
        w_predictions = (torch_exp(output[2]) * anchor_w)[
            confidences > confidence_threshold] / feature_map_width
        h_predictions = (torch_exp(output[3]) * anchor_h)[
            confidences > confidence_threshold] / feature_map_height
        class_scores = class_scores[confidences > confidence_threshold].view(
            -1, self.class_count)
        max_class_scores = max_class_scores[confidences > confidence_threshold]
        top_classes = top_classes[confidences > confidence_threshold]

        all_boxes = []
        for b_index in range(batch_size):
            boxes = []
            for index in range(x_predictions.size(0)):
                objectness = objectness_confidences[index].item()
                cx = x_predictions[index].item()
                cy = y_predictions[index].item()
                w = w_predictions[index].item()
                h = h_predictions[index].item()
                max_class_score = max_class_scores[index].item()
                top_class = top_classes[index].item()

                box = [cx, cy, w, h, objectness, max_class_score, top_class]

                possible_classes = (class_scores[index] * objectness >
                                    confidence_threshold).nonzero()[:, 0]
                possible_classes = possible_classes[
                    possible_classes != top_class]

                for cls in possible_classes:
                    box.append(class_scores[index][cls].item())
                    box.append(cls.item())
                boxes.append(box)
            all_boxes.append(boxes)

        detections = []
        image_width, image_height = image_as_tensor.size(
            3), image_as_tensor.size(2)
        for b_index in range(batch_size):
            batch_detections = []
            boxes = nms(all_boxes[b_index], nms_threshold)
            for box in boxes:
                x1 = max(box[0] - box[2] / 2.0, 0) * image_width
                y1 = max(box[1] - box[3] / 2.0, 0) * image_height
                x2 = min(box[0] + box[2] / 2.0, 1) * image_width
                y2 = min(box[1] + box[3] / 2.0, 1) * image_height
                objectness = box[4]

                for j in range((len(box) - 5) // 2):
                    cls_conf = box[5 + 2 * j]
                    cls_id = box[6 + 2 * j]
                    prob = objectness * cls_conf
                    batch_detections.append([cls_id, prob, x1, y1, x2, y2])
            detections.append(batch_detections)

        return detections