Beispiel #1
0
    def forward(self,
                It,
                S,
                D,
                pre_S,
                pre_D,
                pre_S_hat=None,
                pre_D_hat=None,
                pre_SD=None):
        B, _, H, W = It.shape
        if pre_S_hat is None:
            assert pre_D_hat is None and pre_SD is None
            pre_S_hat = megengine.tensor(
                np.zeros((B, self.hidden_channels, H, W), dtype=np.float32))
            pre_D_hat = F.zeros_like(pre_S_hat)
            pre_SD = F.zeros_like(pre_S_hat)

        # pre_SD = self.hsa(It, pre_SD) # auto select
        S = F.concat([pre_S, S, pre_S_hat, pre_SD], axis=1)
        S = self.pre_SD_S(S)
        D = F.concat([pre_D, D, pre_D_hat, pre_SD], axis=1)
        D = self.pre_SD_D(D)
        for i in range(self.blocknums):
            S, D = self.SDBlocks[i](S, D)
        pre_SD = self.conv_SD(S + D)
        S = self.convS(S)
        D = self.convD(D)
        I = self.convHR(F.concat([S, D], axis=1))
        return self.trans_HR(I), pre_SD, S, D, self.trans_S(S), self.trans_D(D)
def train_generator_batch(image, label, *, opt, netG, netloss):
    netG.train()
    B, T, _, H, W = image.shape
    # image
    image_S = image.reshape((B * T, -1, H, W))
    image_S = F.interpolate(image_S, scale_factor=[0.25, 0.25])
    image_S = F.interpolate(image_S, size=[H, W])
    image_S = image_S.reshape((B, T, -1, H, W))
    image_D = image - image_S
    # label
    label_S = label.reshape((B * T, -1, 4 * H, 4 * W))
    label_S = F.interpolate(label_S, scale_factor=[0.25, 0.25])
    label_S = F.interpolate(label_S, size=[4 * H, 4 * W])
    label_S = label_S.reshape((B, T, -1, 4 * H, 4 * W))
    label_D = label - label_S

    HR_G = []
    HR_D = []
    HR_S = []

    pre_S_hat = mge.tensor(
        np.zeros((B, hidden_channels, H, W), dtype=np.float32))
    pre_D_hat = F.zeros_like(pre_S_hat)
    pre_SD = F.zeros_like(pre_S_hat)

    imgHR, pre_SD, pre_S_hat, pre_D_hat, img_S, img_D = netG(
        image[:, 0, ...], image_S[:, 0, ...], image_D[:, 0, ...],
        image_S[:, 1, ...], image_D[:, 1, ...], pre_S_hat, pre_D_hat, pre_SD)
    HR_G.append(F.add_axis(imgHR, axis=1))
    HR_D.append(F.add_axis(img_D, axis=1))
    HR_S.append(F.add_axis(img_S, axis=1))
    for t in range(1, T):
        imgHR, pre_SD, pre_S_hat, pre_D_hat, img_S, img_D = netG(
            image[:, t, ...], image_S[:, t, ...], image_D[:, t, ...],
            image_S[:, t - 1, ...], image_D[:, t - 1,
                                            ...], pre_S_hat, pre_D_hat, pre_SD)
        HR_G.append(F.add_axis(imgHR, axis=1))
        HR_D.append(F.add_axis(img_S, axis=1))
        HR_S.append(F.add_axis(img_D, axis=1))

    HR_G = F.concat(HR_G, axis=1)
    HR_D = F.concat(HR_D, axis=1)
    HR_S = F.concat(HR_S, axis=1)
    # assert HR_G.shape == HR_D.shape and HR_D.shape == HR_S.shape # [B,T,C,H,W]
    loss = netloss(HR_G, HR_D, HR_S, label, label_D, label_S)
    opt.backward(loss)
    if dist.is_distributed():
        # do all reduce mean
        pass
    return loss
Beispiel #3
0
def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45):
    box_corner = F.zeros_like(prediction)
    box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
    box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
    box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
    box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
    prediction[:, :, :4] = box_corner[:, :, :4]

    output = [None for _ in range(len(prediction))]
    for i, image_pred in enumerate(prediction):

        # If none are remaining => process next image
        if not image_pred.shape[0]:
            continue
        # Get score and class with highest confidence
        class_conf = F.max(image_pred[:, 5 : 5 + num_classes], 1, keepdims=True)
        class_pred = F.argmax(image_pred[:, 5 : 5 + num_classes], 1, keepdims=True)

        class_conf_squeeze = F.squeeze(class_conf)
        conf_mask = image_pred[:, 4] * class_conf_squeeze >= conf_thre
        detections = F.concat((image_pred[:, :5], class_conf, class_pred), 1)
        detections = detections[conf_mask]
        if not detections.shape[0]:
            continue

        nms_out_index = F.vision.nms(
            detections[:, :4], detections[:, 4] * detections[:, 5], nms_thre,
        )
        detections = detections[nms_out_index]
        if output[i] is None:
            output[i] = detections
        else:
            output[i] = F.concat((output[i], detections))

    return output
Beispiel #4
0
 def jvp(inp, expr):
     with GradManager() as gm:
         with GradManager().attach([inp]) as gm2:
             oup = expr(inp)
             oup_grad = F.zeros_like(oup)
             gm.attach(oup_grad)
             gm2.backward(oup, oup_grad)
         gm.backward(inp.grad)
     return oup, oup_grad.grad
Beispiel #5
0
    def forward(self, input_ids, token_type_ids=None):
        seq_length = input_ids.shape[1]

        if token_type_ids is None:
            token_type_ids = F.zeros_like(input_ids)

        position_ids = F.linspace(0, seq_length - 1, seq_length).astype(np.int32)
        position_ids = F.broadcast_to(F.expand_dims(position_ids, 0), input_ids.shape)
        words_embeddings = self.word_embeddings(input_ids)

        position_embeddings = self.position_embeddings(position_ids)
        token_type_embeddings = self.token_type_embeddings(token_type_ids)

        embeddings = words_embeddings + position_embeddings + token_type_embeddings
        embeddings = self.LayerNorm(embeddings)
        embeddings = self.dropout(embeddings)
        return embeddings
Beispiel #6
0
    def forward(
        self,
        input_ids,
        token_type_ids=None,
        attention_mask=None,
        output_all_encoded_layers=True,
    ):
        if attention_mask is None:
            attention_mask = F.ones_like(input_ids)
        if token_type_ids is None:
            token_type_ids = F.zeros_like(input_ids)
        # print('input_ids', input_ids.sum())
        # We create a 3D attention mask from a 2D tensor mask.
        # Sizes are [batch_size, 1, 1, to_seq_length]
        # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]
        # this attention mask is more simple than the triangular masking of causal attention
        # used in OpenAI GPT, we just need to prepare the broadcast dimension here.
        # print('attention_mask', attention_mask.sum())
        extended_attention_mask = F.expand_dims(attention_mask, (1, 2))

        # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
        # masked positions, this operation will create a tensor which is 0.0 for
        # positions we want to attend and -10000.0 for masked positions.
        # Since we are adding it to the raw scores before the softmax, this is
        # effectively the same as removing these entirely.
        extended_attention_mask = extended_attention_mask.astype(
            next(self.parameters()).dtype
        )  # fp16 compatibility
        extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0

        embedding_output = self.embeddings(input_ids, token_type_ids)

        encoded_layers = self.encoder(
            embedding_output,
            extended_attention_mask,
            output_all_encoded_layers=output_all_encoded_layers,
        )

        sequence_output = encoded_layers[-1]
        pooled_output = self.pooler(sequence_output)
        if not output_all_encoded_layers:
            encoded_layers = encoded_layers[-1]
        return encoded_layers, pooled_output
Beispiel #7
0
def sample_labels(labels, num_samples, label_value, ignore_label=-1):
    """sample N labels with label value = sample_labels

    Args:
        labels(Tensor): shape of label is (N,)
        num_samples(int):
        label_value(int):

    Returns:
        label(Tensor): label after sampling
    """
    assert labels.ndim == 1, "Only tensor of dim 1 is supported."
    mask = (labels == label_value)
    num_valid = mask.sum()
    if num_valid <= num_samples:
        return labels

    random_tensor = F.zeros_like(labels).astype("float32")
    random_tensor[mask] = uniform(size=num_valid)
    _, invalid_inds = F.topk(random_tensor, k=num_samples - num_valid)

    labels[invalid_inds] = ignore_label
    return labels
Beispiel #8
0
    def forward(self, now_LR, pre_h_SD):
        """
            now_LR: B,3,H,W
            pre_h_SD: B,64,H,W
        """
        pad = self.K // 2
        batch, C, H, W = pre_h_SD.shape
        kernels = self.conv(now_LR)  # [B, k*k, H, W]
        # 对 pre_h_SD进行padding
        similarity_matrix = F.zeros_like(pre_h_SD)
        pre_h_SD = add_H_W_Padding(pre_h_SD, margin=pad)
        for i in range(self.K):
            for j in range(self.K):
                # 做点乘
                kernel = kernels[:, i * self.K + j, :, :]  # [B, H, W]
                kernel = F.add_axis(kernel, axis=1)  # [B, 1 ,H, W]
                kernel = F.broadcast_to(kernel, [batch, C, H, W])
                corr = kernel * pre_h_SD[:, :, i:(H + i), j:(W + j)]
                similarity_matrix = similarity_matrix + corr  # [B, C, H, W]

        similarity_matrix = F.sigmoid(similarity_matrix)
        return F.multiply(pre_h_SD[:, :, pad:(H + pad), pad:(W + pad)],
                          similarity_matrix)
Beispiel #9
0
    def forward(self, image, im_info, gt_boxes=None):
        image = self.preprocess_image(image)
        features = self.backbone(image)
        features = [features[f] for f in self.in_features]

        box_logits, box_offsets, box_ctrness = self.head(features)

        box_logits_list = [
            _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1,
                                            self.cfg.num_classes)
            for _ in box_logits
        ]
        box_offsets_list = [
            _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, 4)
            for _ in box_offsets
        ]
        box_ctrness_list = [
            _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, 1)
            for _ in box_ctrness
        ]

        anchors_list = self.anchor_generator(features)

        all_level_box_logits = F.concat(box_logits_list, axis=1)
        all_level_box_offsets = F.concat(box_offsets_list, axis=1)
        all_level_box_ctrness = F.concat(box_ctrness_list, axis=1)

        if self.training:
            gt_labels, gt_offsets, gt_ctrness = self.get_ground_truth(
                anchors_list,
                gt_boxes,
                im_info[:, 4].astype(np.int32),
            )

            all_level_box_logits = all_level_box_logits.reshape(
                -1, self.cfg.num_classes)
            all_level_box_offsets = all_level_box_offsets.reshape(-1, 4)
            all_level_box_ctrness = all_level_box_ctrness.flatten()

            gt_labels = gt_labels.flatten()
            gt_offsets = gt_offsets.reshape(-1, 4)
            gt_ctrness = gt_ctrness.flatten()

            valid_mask = gt_labels >= 0
            fg_mask = gt_labels > 0
            num_fg = fg_mask.sum()
            sum_ctr = gt_ctrness[fg_mask].sum()
            # add detach() to avoid syncing across ranks in backward
            num_fg = layers.all_reduce_mean(num_fg).detach()
            sum_ctr = layers.all_reduce_mean(sum_ctr).detach()

            gt_targets = F.zeros_like(all_level_box_logits)
            gt_targets[fg_mask, gt_labels[fg_mask] - 1] = 1

            loss_cls = layers.sigmoid_focal_loss(
                all_level_box_logits[valid_mask],
                gt_targets[valid_mask],
                alpha=self.cfg.focal_loss_alpha,
                gamma=self.cfg.focal_loss_gamma,
            ).sum() / F.maximum(num_fg, 1)

            loss_bbox = (layers.iou_loss(
                all_level_box_offsets[fg_mask],
                gt_offsets[fg_mask],
                box_mode="ltrb",
                loss_type=self.cfg.iou_loss_type,
            ) * gt_ctrness[fg_mask]).sum() / F.maximum(
                sum_ctr, 1e-5) * self.cfg.loss_bbox_weight

            loss_ctr = layers.binary_cross_entropy(
                all_level_box_ctrness[fg_mask],
                gt_ctrness[fg_mask],
            ).sum() / F.maximum(num_fg, 1)

            total = loss_cls + loss_bbox + loss_ctr
            loss_dict = {
                "total_loss": total,
                "loss_cls": loss_cls,
                "loss_bbox": loss_bbox,
                "loss_ctr": loss_ctr,
            }
            self.cfg.losses_keys = list(loss_dict.keys())
            return loss_dict
        else:
            # currently not support multi-batch testing
            assert image.shape[0] == 1

            all_level_anchors = F.concat(anchors_list, axis=0)
            pred_boxes = self.point_coder.decode(all_level_anchors,
                                                 all_level_box_offsets[0])
            pred_boxes = pred_boxes.reshape(-1, 4)

            scale_w = im_info[0, 1] / im_info[0, 3]
            scale_h = im_info[0, 0] / im_info[0, 2]
            pred_boxes = pred_boxes / F.concat(
                [scale_w, scale_h, scale_w, scale_h], axis=0)
            clipped_boxes = layers.get_clipped_boxes(pred_boxes,
                                                     im_info[0, 2:4]).reshape(
                                                         -1, 4)
            pred_score = F.sqrt(
                F.sigmoid(all_level_box_logits) *
                F.sigmoid(all_level_box_ctrness))[0]
            return pred_score, clipped_boxes
Beispiel #10
0
    def forward(self, image, im_info, gt_boxes=None):
        image = self.preprocess_image(image)
        features = self.backbone(image)
        features = [features[f] for f in self.in_features]

        box_logits, box_offsets = self.head(features)

        box_logits_list = [
            _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1,
                                            self.cfg.num_classes)
            for _ in box_logits
        ]
        box_offsets_list = [
            _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, 4)
            for _ in box_offsets
        ]

        anchors_list = self.anchor_generator(features)

        all_level_box_logits = F.concat(box_logits_list, axis=1)
        all_level_box_offsets = F.concat(box_offsets_list, axis=1)
        all_level_anchors = F.concat(anchors_list, axis=0)

        if self.training:
            gt_labels, gt_offsets = self.get_ground_truth(
                all_level_anchors,
                gt_boxes,
                im_info[:, 4].astype(np.int32),
            )

            all_level_box_logits = all_level_box_logits.reshape(
                -1, self.cfg.num_classes)
            all_level_box_offsets = all_level_box_offsets.reshape(-1, 4)

            gt_labels = gt_labels.flatten()
            gt_offsets = gt_offsets.reshape(-1, 4)

            valid_mask = gt_labels >= 0
            fg_mask = gt_labels > 0
            num_fg = fg_mask.sum()

            gt_targets = F.zeros_like(all_level_box_logits)
            gt_targets[fg_mask, gt_labels[fg_mask] - 1] = 1

            loss_cls = layers.sigmoid_focal_loss(
                all_level_box_logits[valid_mask],
                gt_targets[valid_mask],
                alpha=self.cfg.focal_loss_alpha,
                gamma=self.cfg.focal_loss_gamma,
            ).sum() / F.maximum(num_fg, 1)

            loss_bbox = layers.smooth_l1_loss(
                all_level_box_offsets[fg_mask],
                gt_offsets[fg_mask],
                beta=self.cfg.smooth_l1_beta,
            ).sum() / F.maximum(num_fg, 1) * self.cfg.loss_bbox_weight

            total = loss_cls + loss_bbox
            loss_dict = {
                "total_loss": total,
                "loss_cls": loss_cls,
                "loss_bbox": loss_bbox,
            }
            self.cfg.losses_keys = list(loss_dict.keys())
            return loss_dict
        else:
            # currently not support multi-batch testing
            assert image.shape[0] == 1

            pred_boxes = self.box_coder.decode(all_level_anchors,
                                               all_level_box_offsets[0])
            pred_boxes = pred_boxes.reshape(-1, 4)

            scale_w = im_info[0, 1] / im_info[0, 3]
            scale_h = im_info[0, 0] / im_info[0, 2]
            pred_boxes = pred_boxes / F.concat(
                [scale_w, scale_h, scale_w, scale_h], axis=0)
            clipped_boxes = layers.get_clipped_boxes(pred_boxes,
                                                     im_info[0, 2:4]).reshape(
                                                         -1, 4)
            pred_score = F.sigmoid(all_level_box_logits)[0]
            return pred_score, clipped_boxes