Esempio n. 1
0
def get_iou(boxes1: Tensor, boxes2: Tensor, return_ignore=False) -> Tensor:
    """
    Given two lists of boxes of size N and M,
    compute the IoU (intersection over union)
    between __all__ N x M pairs of boxes.
    The box order must be (xmin, ymin, xmax, ymax).

    Args:
        boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively.

    Returns:
        Tensor: IoU, sized [N,M].
    """
    box = boxes1
    gt = boxes2
    target_shape = (boxes1.shapeof(0), boxes2.shapeof(0), 4)

    b_box = F.add_axis(boxes1, 1).broadcast(*target_shape)
    b_gt = F.add_axis(boxes2[:, :4], 0).broadcast(*target_shape)

    iw = F.minimum(b_box[:, :, 2], b_gt[:, :, 2]) - F.maximum(
        b_box[:, :, 0], b_gt[:, :, 0]
    )
    ih = F.minimum(b_box[:, :, 3], b_gt[:, :, 3]) - F.maximum(
        b_box[:, :, 1], b_gt[:, :, 1]
    )
    inter = F.maximum(iw, 0) * F.maximum(ih, 0)

    area_box = (box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1])
    area_gt = (gt[:, 2] - gt[:, 0]) * (gt[:, 3] - gt[:, 1])

    area_target_shape = (box.shapeof(0), gt.shapeof(0))

    b_area_box = F.add_axis(area_box, 1).broadcast(*area_target_shape)
    b_area_gt = F.add_axis(area_gt, 0).broadcast(*area_target_shape)

    union = b_area_box + b_area_gt - inter
    overlaps = F.maximum(inter / union, 0)

    if return_ignore:
        overlaps_ignore = F.maximum(inter / b_area_box, 0)
        gt_ignore_mask = F.add_axis((gt[:, 4] == -1), 0).broadcast(*area_target_shape)
        overlaps *= (1 - gt_ignore_mask)
        overlaps_ignore *= gt_ignore_mask
        return overlaps, overlaps_ignore

    return overlaps
Esempio n. 2
0
    def get_cls_reg_ctr_targets(self, points, gt_bboxes, bbox_scale=0.15):
        """
            Compute regression, classification targets for points in multiple images.
            Args:
                points (Tensor): (1, 2, 37, 37). 每个点在原图上对应的点的位置
                gt_bboxes (Tensor): Ground truth bboxes of each image, (B,4), in [tl_x, tl_y, br_x, br_y] format. 左上角右下角 原图上的bbox框
            Returns:
                cls_labels (Tensor): Labels. (B, 1, 37, 37)   0 or 1, 0 means background, 1 means in the box.
                bbox_targets (Tensor): BBox targets. (B, 4, 37, 37)  only consider the foreground, for the background should set loss as 0!
                centerness_targets (Tensor): (B, 1, 37, 37)  only consider the foreground, for the background should set loss as 0!
        """
        B, _ = gt_bboxes.shape
        gt_bboxes = F.add_axis(gt_bboxes, axis=-1)
        gt_bboxes = F.add_axis(gt_bboxes, axis=-1)  # (B,4,1,1)
        # cls_labels
        # 计算四个值以确定是否在内部,由于template比较大,于是缩小bbox为之前的1/4
        gap = (gt_bboxes[:, 2, ...] -
               gt_bboxes[:, 0, ...]) * (1 - bbox_scale) / 2  #求出bbox的边长
        up_bound = points[:, 0, ...] > gt_bboxes[:, 0, ...] + gap
        left_bound = points[:, 1, ...] > gt_bboxes[:, 1, ...] + gap
        down_bound = points[:, 0, ...] < gt_bboxes[:, 2, ...] - gap
        right_bound = points[:, 1, ...] < gt_bboxes[:, 3, ...] - gap
        cls_labels = up_bound * left_bound * down_bound * right_bound
        cls_labels = F.add_axis(cls_labels, axis=1)  # (B, 1, 37, 37)
        cls_labels.requires_grad = False

        # bbox_targets
        # 对于points中的每个坐标,计算偏离情况(这里每个坐标都会计算,所以会有负数)
        up_left = points - gt_bboxes[:, 0:2,
                                     ...]  # (B, 2, 37, 37) score map每个点和左上角点的差
        bottom_right = gt_bboxes[:, 2:4, ...] - points
        bbox_targets = F.concat([up_left, bottom_right],
                                axis=1)  # (B, 4, 37, 37)
        bbox_targets.requires_grad = False

        # centerness_targets
        up_bottom = F.minimum(up_left[:, 0, ...],
                              bottom_right[:, 0, ...]) / F.maximum(
                                  up_left[:, 0, ...], bottom_right[:, 0, ...])
        left_right = F.minimum(up_left[:, 1, ...],
                               bottom_right[:, 1, ...]) / F.maximum(
                                   up_left[:, 1, ...], bottom_right[:, 1, ...])
        centerness_targets = F.sqrt(F.abs(up_bottom * left_right))
        centerness_targets = F.add_axis(centerness_targets,
                                        axis=1)  # (B,1,37,37)
        centerness_targets.requires_grad = False
        return cls_labels, bbox_targets, centerness_targets
Esempio n. 3
0
    def get_center_offsets(self, featmap, stride):
        f_shp = featmap.shape
        fm_height, fm_width = f_shp[-2], f_shp[-1]

        shift_x = F.linspace(0, fm_width - 1, fm_width) * stride
        shift_y = F.linspace(0, fm_height - 1, fm_height) * stride

        # make the mesh grid of shift_x and shift_y
        mesh_shape = (fm_height, fm_width)
        broad_shift_x = shift_x.reshape(-1, shift_x.shape[0]).broadcast(*mesh_shape)
        broad_shift_y = shift_y.reshape(shift_y.shape[0], -1).broadcast(*mesh_shape)

        flatten_shift_x = F.add_axis(broad_shift_x.reshape(-1), 1)
        flatten_shift_y = F.add_axis(broad_shift_y.reshape(-1), 1)

        shifts = F.concat(
            [flatten_shift_x, flatten_shift_y, flatten_shift_x, flatten_shift_y,],
            axis=1)
        return shifts 
Esempio n. 4
0
def get_focal_loss(
    logits: Tensor,
    labels: Tensor,
    ignore_label: int = -1,
    background: int = 0,
    alpha: float = 0.5,
    gamma: float = 0,
    norm_type: str = "fg",
) -> Tensor:
    r"""Focal Loss for Dense Object Detection:
    <https://arxiv.org/pdf/1708.02002.pdf>

    .. math::

        FL(p_t) = -\alpha_t(1-p_t)^\gamma \log(p_t)

    Args:
        logits (Tensor):
            the predicted logits with the shape of :math:`(B, A, C)`
        labels (Tensor):
            the assigned labels of boxes with shape of :math:`(B, A)`
        ignore_label (int):
            the value of ignore class. Default: -1
        background (int):
            the value of background class. Default: 0
        alpha (float):
            parameter to mitigate class imbalance. Default: 0.5
        gamma (float):
            parameter to mitigate easy/hard loss imbalance. Default: 0
        norm_type (str): current support "fg", "none":
            "fg": loss will be normalized by number of fore-ground samples
            "none": not norm

    Returns:
        the calculated focal loss.
    """
    class_range = F.arange(1, logits.shape[2] + 1)

    labels = F.add_axis(labels, axis=2)
    scores = F.sigmoid(logits)
    pos_part = (1 - scores)**gamma * layers.logsigmoid(logits)
    neg_part = scores**gamma * layers.logsigmoid(-logits)

    pos_loss = -(labels == class_range) * pos_part * alpha
    neg_loss = (-(labels != class_range) * (labels != ignore_label) *
                neg_part * (1 - alpha))
    loss = (pos_loss + neg_loss).sum()

    if norm_type == "fg":
        fg_mask = (labels != background) * (labels != ignore_label)
        return loss / F.maximum(fg_mask.sum(), 1)
    elif norm_type == "none":
        return loss
    else:
        raise NotImplementedError
Esempio n. 5
0
def box_overlap_opr(boxes1: Tensor, boxes2: Tensor) -> Tensor:
    """
    Given two lists of boxes of size N and M,
    compute the IoU (intersection over union)
    between __all__ N x M pairs of boxes.
    The box order must be (xmin, ymin, xmax, ymax).

    Args:
        boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively.

    Returns:
        Tensor: IoU, sized [N,M].
    """
    box = boxes1
    gt = boxes2
    target_shape = (boxes1.shape[0], boxes2.shapeof()[0], 4)

    b_box = F.add_axis(boxes1, 1).broadcast(*target_shape)
    b_gt = F.add_axis(boxes2, 0).broadcast(*target_shape)

    iw = F.minimum(b_box[:, :, 2], b_gt[:, :, 2]) - F.maximum(
        b_box[:, :, 0], b_gt[:, :, 0])
    ih = F.minimum(b_box[:, :, 3], b_gt[:, :, 3]) - F.maximum(
        b_box[:, :, 1], b_gt[:, :, 1])
    inter = F.maximum(iw, 0) * F.maximum(ih, 0)

    area_box = (box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1])
    area_gt = (gt[:, 2] - gt[:, 0]) * (gt[:, 3] - gt[:, 1])

    area_target_shape = (box.shape[0], gt.shapeof()[0])

    b_area_box = F.add_axis(area_box, 1).broadcast(*area_target_shape)
    b_area_gt = F.add_axis(area_gt, 0).broadcast(*area_target_shape)

    union = b_area_box + b_area_gt - inter
    overlaps = F.maximum(inter / union, 0)

    return overlaps
Esempio n. 6
0
def get_cls_reg_ctr_targets(points, gt_bboxes, bbox_scale=0.25):
    """
        Compute regression, classification targets for points in multiple images.
        Args:
            points (Tensor): (1, 2, 19, 19).
            gt_bboxes (Tensor): Ground truth bboxes of each image, (B,4), in [tl_x, tl_y, br_x, br_y] format.
        Returns:
            cls_labels (Tensor): Labels. (B, 1, 19, 19)   0 or 1, 0 means background, 1 means in the box.
            bbox_targets (Tensor): BBox targets. (B, 4, 19, 19)  only consider the foreground, for the background should set loss as 0!
            centerness_targets (Tensor): (B, 1, 19, 19)  only consider the foreground, for the background should set loss as 0!
    """
    gt_bboxes = F.add_axis(gt_bboxes, axis=-1)
    gt_bboxes = F.add_axis(gt_bboxes, axis=-1)  # (B,4,1,1)
    # cls_labels
    # 计算四个值以确定是否在内部,由于template比较大,于是缩小bbox为之前的1/2
    gap = (gt_bboxes[:, 2, ...] - gt_bboxes[:, 0, ...]) * (1 - bbox_scale) / 2
    up_bound = points[:, 0, ...] > gt_bboxes[:, 0, ...] + gap
    left_bound = points[:, 1, ...] > gt_bboxes[:, 1, ...] + gap
    down_bound = points[:, 0, ...] < gt_bboxes[:, 2, ...] - gap
    right_bound = points[:, 1, ...] < gt_bboxes[:, 3, ...] - gap
    cls_labels = up_bound * left_bound * down_bound * right_bound
    cls_labels = F.add_axis(cls_labels, axis=1)  # (B,1,19,19)

    # bbox_targets
    # 对于points中的每个坐标,计算偏离情况(这里每个坐标都会计算,所以会有负数)
    up_left = points - gt_bboxes[:, 0:2, ...]  # (B, 2, 19, 19)
    bottom_right = gt_bboxes[:, 2:4, ...] - points
    bbox_targets = F.concat([up_left, bottom_right], axis=1)  # (B, 4, 19, 19)

    # centerness_targets
    up_bottom = F.minimum(up_left[:, 0, ...],
                          bottom_right[:, 0, ...]) / F.maximum(
                              up_left[:, 0, ...], bottom_right[:, 0, ...])
    left_right = F.minimum(up_left[:, 1, ...],
                           bottom_right[:, 1, ...]) / F.maximum(
                               up_left[:, 1, ...], bottom_right[:, 1, ...])
    centerness_targets = F.sqrt(F.abs(up_bottom * left_right))
    return cls_labels, bbox_targets, centerness_targets
Esempio n. 7
0
    def forward(self, fpn_fms, rcnn_rois, im_info=None, gt_boxes=None):
        rcnn_rois, labels, bbox_targets = self.get_ground_truth(
            rcnn_rois, im_info, gt_boxes)

        fpn_fms = [fpn_fms[x] for x in self.in_features]
        pool_features = layers.roi_pool(
            fpn_fms,
            rcnn_rois,
            self.stride,
            self.pooling_size,
            self.pooling_method,
        )
        flatten_feature = F.flatten(pool_features, start_axis=1)
        roi_feature = F.relu(self.fc1(flatten_feature))
        roi_feature = F.relu(self.fc2(roi_feature))
        pred_cls = self.pred_cls(roi_feature)
        pred_delta = self.pred_delta(roi_feature)

        if self.training:
            # loss for classification
            loss_rcnn_cls = layers.softmax_loss(pred_cls, labels)
            # loss for regression
            pred_delta = pred_delta.reshape(-1, self.cfg.num_classes + 1, 4)

            vlabels = labels.reshape(-1, 1).broadcast((labels.shapeof(0), 4))
            pred_delta = F.indexing_one_hot(pred_delta, vlabels, axis=1)

            loss_rcnn_loc = layers.get_smooth_l1_loss(
                pred_delta,
                bbox_targets,
                labels,
                self.cfg.rcnn_smooth_l1_beta,
                norm_type="all",
            )
            loss_dict = {
                'loss_rcnn_cls': loss_rcnn_cls,
                'loss_rcnn_loc': loss_rcnn_loc
            }
            return loss_dict
        else:
            # slice 1 for removing background
            pred_scores = F.softmax(pred_cls, axis=1)[:, 1:]
            pred_delta = pred_delta[:, 4:].reshape(-1, 4)
            target_shape = (rcnn_rois.shapeof(0), self.cfg.num_classes, 4)
            # rois (N, 4) -> (N, 1, 4) -> (N, 80, 4) -> (N * 80, 4)
            base_rois = F.add_axis(rcnn_rois[:, 1:5],
                                   1).broadcast(target_shape).reshape(-1, 4)
            pred_bbox = self.box_coder.decode(base_rois, pred_delta)
            return pred_bbox, pred_scores
Esempio n. 8
0
def test_generator_batch(optical, sar, *, netG):
    netG.eval()
    cls_score, offsets, ctr_score = netG(
        sar, optical)  # [B,1,19,19]  [B,2,19,19]  [B,1,19,19]
    B, _, _, _ = cls_score.shape
    # 加权
    # cls_score = cls_score * ctr_score
    cls_score = cls_score.reshape(B, -1)  # [B,19*19]
    # find the max
    max_id = F.argmax(cls_score, axis=1)  # (B, )
    pred_box = get_box(netG.fm_ctr, offsets)  # (B,4,H,W)
    pred_box = pred_box.reshape(B, 4, -1)
    output = []
    for i in range(B):
        output.append(F.add_axis(pred_box[i, :, max_id[i]], axis=0))  # (1, 4)
    return F.concat(output, axis=0)  # [B,4]
Esempio n. 9
0
    def forward(self, input_ids, token_type_ids=None):
        seq_length = input_ids.shape[1]

        if token_type_ids is None:
            token_type_ids = zeros_like(input_ids)

        position_ids = F.linspace(0, seq_length - 1,
                                  seq_length).astype(np.int32)
        position_ids = F.add_axis(position_ids, 0).broadcast(*input_ids.shape)
        words_embeddings = self.word_embeddings(input_ids)

        position_embeddings = self.position_embeddings(position_ids)
        token_type_embeddings = self.token_type_embeddings(token_type_ids)

        embeddings = words_embeddings + position_embeddings + token_type_embeddings
        embeddings = self.LayerNorm(embeddings)
        embeddings = self.dropout(embeddings)
        return embeddings
Esempio n. 10
0
 def forward(self, fpn_fms, rcnn_rois, labels=None, bbox_targets=None):
     # stride: 64,32,16,8,4 -> 4, 8, 16, 32
     fpn_fms = fpn_fms[1:][::-1]
     stride = [4, 8, 16, 32]
     pool_features, rcnn_rois, labels, bbox_targets = roi_pool(
         fpn_fms, rcnn_rois, stride, (7, 7), 'roi_align', labels,
         bbox_targets)
     flatten_feature = F.flatten(pool_features, start_axis=1)
     roi_feature = F.relu(self.fc1(flatten_feature))
     roi_feature = F.relu(self.fc2(roi_feature))
     pred_cls = self.pred_cls(roi_feature)
     pred_delta = self.pred_delta(roi_feature)
     if self.training:
         # loss for regression
         labels = labels.astype(np.int32).reshape(-1)
         # mulitple class to one
         pos_masks = labels > 0
         pred_delta = pred_delta.reshape(-1, config.num_classes, 4)
         indexing_label = (labels * pos_masks).reshape(-1, 1)
         indexing_label = indexing_label.broadcast((labels.shapeof()[0], 4))
         pred_delta = F.indexing_one_hot(pred_delta, indexing_label, 1)
         localization_loss = smooth_l1_loss(pred_delta, bbox_targets,
                                            config.rcnn_smooth_l1_beta)
         localization_loss = localization_loss * pos_masks
         # loss for classification
         valid_masks = labels >= 0
         objectness_loss = softmax_loss(pred_cls, labels)
         objectness_loss = objectness_loss * valid_masks
         normalizer = 1.0 / (valid_masks.sum())
         loss_rcnn_cls = objectness_loss.sum() * normalizer
         loss_rcnn_loc = localization_loss.sum() * normalizer
         loss_dict = {}
         loss_dict['loss_rcnn_cls'] = loss_rcnn_cls
         loss_dict['loss_rcnn_loc'] = loss_rcnn_loc
         return loss_dict
     else:
         pred_scores = F.softmax(pred_cls)[:, 1:].reshape(-1, 1)
         pred_delta = pred_delta[:, 4:].reshape(-1, 4)
         target_shape = (rcnn_rois.shapeof()[0], config.num_classes - 1, 4)
         base_rois = F.add_axis(rcnn_rois[:, 1:5],
                                1).broadcast(target_shape).reshape(-1, 4)
         pred_bbox = restore_bbox(base_rois, pred_delta, True)
         pred_bbox = F.concat([pred_bbox, pred_scores], axis=1)
         return pred_bbox
Esempio n. 11
0
 def forward(self, fpn_fms, rcnn_rois, labels=None, bbox_targets=None):
     # stride: 64,32,16,8,4 -> 4, 8, 16, 32
     fpn_fms = fpn_fms[1:][::-1]
     stride = [4, 8, 16, 32]
     pool_features, rcnn_rois, labels, bbox_targets = roi_pool(
             fpn_fms, rcnn_rois, stride, (7, 7), 'roi_align',
             labels, bbox_targets)
     flatten_feature = F.flatten(pool_features, start_axis=1)
     roi_feature = F.relu(self.fc1(flatten_feature))
     roi_feature = F.relu(self.fc2(roi_feature))
     pred_emd_pred_cls_0 = self.emd_pred_cls_0(roi_feature)
     pred_emd_pred_delta_0 = self.emd_pred_delta_0(roi_feature)
     pred_emd_pred_cls_1 = self.emd_pred_cls_1(roi_feature)
     pred_emd_pred_delta_1 = self.emd_pred_delta_1(roi_feature)
     if self.training:
         loss0 = emd_loss(
                     pred_emd_pred_delta_0, pred_emd_pred_cls_0,
                     pred_emd_pred_delta_1, pred_emd_pred_cls_1,
                     bbox_targets, labels)
         loss1 = emd_loss(
                     pred_emd_pred_delta_1, pred_emd_pred_cls_1,
                     pred_emd_pred_delta_0, pred_emd_pred_cls_0,
                     bbox_targets, labels)
         loss = F.concat([loss0, loss1], axis=1)
         indices = F.argmin(loss, axis=1)
         loss_emd = F.indexing_one_hot(loss, indices, 1)
         loss_emd = loss_emd.sum()/loss_emd.shapeof()[0]
         loss_dict = {}
         loss_dict['loss_rcnn_emd'] = loss_emd
         return loss_dict
     else:
         pred_scores_0 = F.softmax(pred_emd_pred_cls_0)[:, 1:].reshape(-1, 1)
         pred_scores_1 = F.softmax(pred_emd_pred_cls_1)[:, 1:].reshape(-1, 1)
         pred_delta_0 = pred_emd_pred_delta_0[:, 4:].reshape(-1, 4)
         pred_delta_1 = pred_emd_pred_delta_1[:, 4:].reshape(-1, 4)
         target_shape = (rcnn_rois.shapeof()[0], config.num_classes - 1, 4)
         base_rois = F.add_axis(rcnn_rois[:, 1:5], 1).broadcast(target_shape).reshape(-1, 4)
         pred_bbox_0 = restore_bbox(base_rois, pred_delta_0, True)
         pred_bbox_1 = restore_bbox(base_rois, pred_delta_1, True)
         pred_bbox_0 = F.concat([pred_bbox_0, pred_scores_0], axis=1)
         pred_bbox_1 = F.concat([pred_bbox_1, pred_scores_1], axis=1)
         #[{head0, pre1, tag1}, {head1, pre1, tag1}, {head0, pre1, tag2}, ...]
         pred_bbox = F.concat((pred_bbox_0, pred_bbox_1), axis=1).reshape(-1,5)
         return pred_bbox
Esempio n. 12
0
def train_generator_batch(image, label, *, opt, netG, netloss):
    netG.train()
    B, T, _, H, W = image.shape
    # image
    image_S = image.reshape((B * T, -1, H, W))
    image_S = F.interpolate(image_S, scale_factor=[0.25, 0.25])
    image_S = F.interpolate(image_S, size=[H, W])
    image_S = image_S.reshape((B, T, -1, H, W))
    image_D = image - image_S
    # label
    label_S = label.reshape((B * T, -1, 4 * H, 4 * W))
    label_S = F.interpolate(label_S, scale_factor=[0.25, 0.25])
    label_S = F.interpolate(label_S, size=[4 * H, 4 * W])
    label_S = label_S.reshape((B, T, -1, 4 * H, 4 * W))
    label_D = label - label_S

    HR_G = []
    HR_D = []
    HR_S = []

    pre_S_hat = mge.tensor(
        np.zeros((B, hidden_channels, H, W), dtype=np.float32))
    pre_D_hat = F.zeros_like(pre_S_hat)
    pre_SD = F.zeros_like(pre_S_hat)

    imgHR, pre_SD, pre_S_hat, pre_D_hat, img_S, img_D = netG(
        image[:, 0, ...], image_S[:, 0, ...], image_D[:, 0, ...],
        image_S[:, 1, ...], image_D[:, 1, ...], pre_S_hat, pre_D_hat, pre_SD)
    HR_G.append(F.add_axis(imgHR, axis=1))
    HR_D.append(F.add_axis(img_D, axis=1))
    HR_S.append(F.add_axis(img_S, axis=1))
    for t in range(1, T):
        imgHR, pre_SD, pre_S_hat, pre_D_hat, img_S, img_D = netG(
            image[:, t, ...], image_S[:, t, ...], image_D[:, t, ...],
            image_S[:, t - 1, ...], image_D[:, t - 1,
                                            ...], pre_S_hat, pre_D_hat, pre_SD)
        HR_G.append(F.add_axis(imgHR, axis=1))
        HR_D.append(F.add_axis(img_S, axis=1))
        HR_S.append(F.add_axis(img_D, axis=1))

    HR_G = F.concat(HR_G, axis=1)
    HR_D = F.concat(HR_D, axis=1)
    HR_S = F.concat(HR_S, axis=1)
    # assert HR_G.shape == HR_D.shape and HR_D.shape == HR_S.shape # [B,T,C,H,W]
    loss = netloss(HR_G, HR_D, HR_S, label, label_D, label_S)
    opt.backward(loss)
    if dist.is_distributed():
        # do all reduce mean
        pass
    return loss
Esempio n. 13
0
    def forward(
        self,
        input_ids,
        token_type_ids=None,
        attention_mask=None,
        output_all_encoded_layers=True,
    ):
        if attention_mask is None:
            attention_mask = ones_like(input_ids)
        if token_type_ids is None:
            token_type_ids = zeros_like(input_ids)
        # print('input_ids', input_ids.sum())
        # We create a 3D attention mask from a 2D tensor mask.
        # Sizes are [batch_size, 1, 1, to_seq_length]
        # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]
        # this attention mask is more simple than the triangular masking of causal attention
        # used in OpenAI GPT, we just need to prepare the broadcast dimension here.
        # print('attention_mask', attention_mask.sum())
        extended_attention_mask = F.add_axis(attention_mask, (1, 2))

        # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
        # masked positions, this operation will create a tensor which is 0.0 for
        # positions we want to attend and -10000.0 for masked positions.
        # Since we are adding it to the raw scores before the softmax, this is
        # effectively the same as removing these entirely.
        extended_attention_mask = extended_attention_mask.astype(
            next(self.parameters()).dtype)  # fp16 compatibility
        extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0

        embedding_output = self.embeddings(input_ids, token_type_ids)

        encoded_layers = self.encoder(
            embedding_output,
            extended_attention_mask,
            output_all_encoded_layers=output_all_encoded_layers,
        )

        sequence_output = encoded_layers[-1]
        pooled_output = self.pooler(sequence_output)
        if not output_all_encoded_layers:
            encoded_layers = encoded_layers[-1]
        return encoded_layers, pooled_output
Esempio n. 14
0
 def forward(self, now_LR, pre_h_SD):
     """
     now_LR: B,3,H,W
     pre_h_SD: B,48,H,W
     """
     batch, C, H, W = pre_h_SD.shape
     kernels = self.conv(now_LR)  # [B, k*k, H, W]
     batchwise_ans = []
     for idx in range(batch):
         kernel = kernels[idx]  # [k*k, H, W]
         kernel = F.dimshuffle(kernel, (1, 2, 0))  # [H, W , k*k]
         kernel = F.reshape(kernel, (H, W, 1, self.K, self.K, 1))
         kernel = F.broadcast_to(kernel, (C, H, W, 1, self.K, self.K, 1))
         batchwise_ans.append(
             F.local_conv2d(
                 F.add_axis(pre_h_SD[idx], 0), kernel, [1, 1], [1, 1],
                 [1, 1]))  # [1, C, H, W]      some bug with padding
     similarity_matrix = F.concat(batchwise_ans, axis=0)  # [B,C,H,W]
     del batchwise_ans
     similarity_matrix = F.sigmoid(similarity_matrix)
     return F.multiply(pre_h_SD, similarity_matrix)
Esempio n. 15
0
    def forward(self, now_LR, pre_h_SD):
        """
            now_LR: B,3,H,W
            pre_h_SD: B,64,H,W
        """
        pad = self.K // 2
        batch, C, H, W = pre_h_SD.shape
        kernels = self.conv(now_LR)  # [B, k*k, H, W]
        # 对 pre_h_SD进行padding
        similarity_matrix = F.zeros_like(pre_h_SD)
        pre_h_SD = add_H_W_Padding(pre_h_SD, margin=pad)
        for i in range(self.K):
            for j in range(self.K):
                # 做点乘
                kernel = kernels[:, i * self.K + j, :, :]  # [B, H, W]
                kernel = F.add_axis(kernel, axis=1)  # [B, 1 ,H, W]
                kernel = F.broadcast_to(kernel, [batch, C, H, W])
                corr = kernel * pre_h_SD[:, :, i:(H + i), j:(W + j)]
                similarity_matrix = similarity_matrix + corr  # [B, C, H, W]

        similarity_matrix = F.sigmoid(similarity_matrix)
        return F.multiply(pre_h_SD[:, :, pad:(H + pad), pad:(W + pad)],
                          similarity_matrix)
Esempio n. 16
0
    def forward(self, x):
        if self.transform_input:
            x_ch0 = F.add_axis(x[:, 0],
                               1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5
            x_ch1 = F.add_axis(x[:, 1],
                               1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5
            x_ch2 = F.add_axis(x[:, 2],
                               1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5
            x = F.concat([x_ch0, x_ch1, x_ch2], 1)

        # N x 3 x 224 x 224
        x = self.conv1(x)
        # N x 64 x 112 x 112
        x = self.maxpool1(x)
        # N x 64 x 56 x 56
        x = self.conv2(x)
        # N x 64 x 56 x 56
        x = self.conv3(x)
        # N x 192 x 56 x 56
        x = self.maxpool2(x)

        # N x 192 x 28 x 28
        x = self.inception3a(x)
        # N x 256 x 28 x 28
        x = self.inception3b(x)
        # N x 480 x 28 x 28
        x = self.maxpool3(x)
        # N x 480 x 14 x 14
        x = self.inception4a(x)
        # N x 512 x 14 x 14
        if self.training and self.aux_logits:
            aux1 = self.aux1(x)

        x = self.inception4b(x)
        # N x 512 x 14 x 14
        x = self.inception4c(x)
        # N x 512 x 14 x 14
        x = self.inception4d(x)
        # N x 528 x 14 x 14
        if self.training and self.aux_logits:
            aux2 = self.aux2(x)

        x = self.inception4e(x)
        # N x 832 x 14 x 14
        print(x.shape)
        x = self.maxpool4(x)
        # N x 832 x 7 x 7
        x = self.inception5a(x)
        # N x 832 x 7 x 7
        x = self.inception5b(x)
        # N x 1024 x 7 x 7
        print(x.shape)
        x = self.avgpool(x)
        # N x 1024 x 1 x 1
        x = x.reshape(x.shape[0], -1)
        # N x 1024
        x = self.dropout(x)
        x = self.fc(x)
        # N x 1000 (num_classes)
        if self.training and self.aux_logits:
            return _GoogLeNetOuputs(x, aux2, aux1)
        return x
Esempio n. 17
0
def train_generator_batch(image, label, *, opt, netG, netloss):
    netG.train()
    B, T, _, H, W = image.shape
    HR_G = []

    # first frame
    pre_SD = mge.tensor(np.zeros((B, hidden_channels, H, W), dtype=np.float32))
    LR = F.concat([
        F.add_axis(image[:, 2, ...], axis=1),
        F.add_axis(image[:, 1, ...], axis=1), image[:, 0:3, ...]
    ],
                  axis=1)
    imgHR, pre_SD = netG(LR, pre_SD)
    # first frame result
    HR_G.append(F.add_axis(imgHR, axis=1))

    # second frame
    LR = F.concat([F.add_axis(image[:, 1, ...], axis=1), image[:, 0:4, ...]],
                  axis=1)
    imgHR, pre_SD = netG(LR, pre_SD)
    # second frame result
    HR_G.append(F.add_axis(imgHR, axis=1))

    for t in range(2, T - 2):
        imgHR, pre_SD = netG(image[:, t - 2:t + 3, ...], pre_SD)
        HR_G.append(F.add_axis(imgHR, axis=1))

    # T-2 frame
    LR = F.concat(
        [image[:, T - 4:T, ...],
         F.add_axis(image[:, -2, ...], axis=1)],
        axis=1)
    imgHR, pre_SD = netG(LR, pre_SD)
    # T-2 frame result
    HR_G.append(F.add_axis(imgHR, axis=1))

    # T-1 frame
    LR = F.concat([
        image[:, T - 3:T, ...],
        F.add_axis(image[:, -2, ...], axis=1),
        F.add_axis(image[:, -3, ...], axis=1)
    ],
                  axis=1)
    imgHR, pre_SD = netG(LR, pre_SD)
    # T-1 frame result
    HR_G.append(F.add_axis(imgHR, axis=1))

    HR_G = F.concat(HR_G, axis=1)
    # assert HR_G.shape == HR_D.shape and HR_D.shape == HR_S.shape # [B,T,C,H,W]
    loss = netloss(HR_G, label)
    opt.backward(loss)
    if dist.is_distributed():
        # do all reduce mean
        pass
    return loss
Esempio n. 18
0
def cascade_roi_target(rpn_rois, im_info, gt_boxes, pos_threshold=0.5, top_k=1):
    return_rois = []
    return_labels = []
    return_bbox_targets = []
    # get per image proposals and gt_boxes
    for bid in range(config.batch_per_gpu):
        gt_boxes_perimg = gt_boxes[bid, :im_info[bid, 5], :]
        batch_inds = mge.ones((gt_boxes_perimg.shapeof()[0], 1)) * bid
        #if config.proposal_append_gt:
        gt_rois = F.concat([batch_inds, gt_boxes_perimg[:, :4]], axis=1)
        batch_roi_mask = rpn_rois[:, 0] == bid
        batch_roi_inds = mask_to_inds(batch_roi_mask)
        all_rois = F.concat([rpn_rois.ai[batch_roi_inds], gt_rois], axis=0)
        overlaps_normal, overlaps_ignore = box_overlap_ignore_opr(
                all_rois[:, 1:5], gt_boxes_perimg)
        overlaps_normal, overlaps_normal_indices = F.argsort(overlaps_normal, descending=True)
        overlaps_ignore, overlaps_ignore_indices = F.argsort(overlaps_ignore, descending=True)
        # gt max and indices, ignore max and indices
        max_overlaps_normal = overlaps_normal[:, :top_k].reshape(-1)
        gt_assignment_normal = overlaps_normal_indices[:, :top_k].reshape(-1)
        max_overlaps_ignore = overlaps_ignore[:, :top_k].reshape(-1)
        gt_assignment_ignore = overlaps_ignore_indices[:, :top_k].reshape(-1)
        # cons masks
        ignore_assign_mask = (max_overlaps_normal < config.fg_threshold) * (
                max_overlaps_ignore > max_overlaps_normal)
        max_overlaps = max_overlaps_normal * (1 - ignore_assign_mask) + \
                max_overlaps_ignore * ignore_assign_mask
        gt_assignment = gt_assignment_normal * (1- ignore_assign_mask) + \
                gt_assignment_ignore * ignore_assign_mask
        gt_assignment = gt_assignment.astype(np.int32)
        labels = gt_boxes_perimg.ai[gt_assignment, 4]
        fg_mask = (max_overlaps >= config.fg_threshold) * (1 - F.equal(labels, config.ignore_label))
        bg_mask = (max_overlaps < config.bg_threshold_high) * (
                max_overlaps >= config.bg_threshold_low)
        fg_mask = fg_mask.reshape(-1, top_k)
        bg_mask = bg_mask.reshape(-1, top_k)
        #pos_max = config.num_rois * config.fg_ratio
        #fg_inds_mask = _bernoulli_sample_masks(fg_mask[:, 0], pos_max, 1)
        #neg_max = config.num_rois - fg_inds_mask.sum()
        #bg_inds_mask = _bernoulli_sample_masks(bg_mask[:, 0], neg_max, 1)
        labels = labels * fg_mask.reshape(-1)
        #keep_mask = fg_inds_mask + bg_inds_mask
        #keep_inds = mask_to_inds(keep_mask)
        #keep_inds = keep_inds[:F.minimum(config.num_rois, keep_inds.shapeof()[0])]
        # labels
        labels = labels.reshape(-1, top_k)
        gt_assignment = gt_assignment.reshape(-1, top_k).reshape(-1)
        target_boxes = gt_boxes_perimg.ai[gt_assignment, :4]
        #rois = all_rois.ai[keep_inds]
        target_shape = (all_rois.shapeof()[0], top_k, all_rois.shapeof()[-1])
        target_rois = F.add_axis(all_rois, 1).broadcast(target_shape).reshape(-1, all_rois.shapeof()[-1])
        bbox_targets = bbox_transform_opr(target_rois[:, 1:5], target_boxes)
        if config.rcnn_bbox_normalize_targets:
            std_opr = mge.tensor(config.bbox_normalize_stds[None, :])
            mean_opr = mge.tensor(config.bbox_normalize_means[None, :])
            minus_opr = mean_opr / std_opr
            bbox_targets = bbox_targets / std_opr - minus_opr
        bbox_targets = bbox_targets.reshape(-1, top_k * 4)
        return_rois.append(all_rois)
        return_labels.append(labels)
        return_bbox_targets.append(bbox_targets)
    if config.batch_per_gpu == 1:
        return F.zero_grad(all_rois), F.zero_grad(labels), F.zero_grad(bbox_targets)
    else:
        return_rois = F.concat(return_rois, axis=0)
        return_labels = F.concat(return_labels, axis=0)
        return_bbox_targets = F.concat(return_bbox_targets, axis=0)
        return F.zero_grad(return_rois), F.zero_grad(return_labels), F.zero_grad(return_bbox_targets)