def forward(self, input):
        x, low_level_features = self.xception_features(input)
        x1 = self.aspp1(x)
        x2 = self.aspp2(x)
        x3 = self.aspp3(x)
        x4 = self.aspp4(x)
        x5 = self.global_avg_pool(x)
        x5 = F.interpolate(x5, size=x4.size()[2:], mode='bilinear', align_corners=True)

        x = torch.cat((x1, x2, x3, x4, x5), dim=1)

        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = F.interpolate(x, size=(int(math.ceil(input.size()[-2]/4)),
                                int(math.ceil(input.size()[-1]/4))), mode='bilinear', align_corners=True)

        low_level_features = self.conv2(low_level_features)
        low_level_features = self.bn2(low_level_features)
        low_level_features = self.relu(low_level_features)


        x = torch.cat((x, low_level_features), dim=1)
        x = self.last_conv(x)
        x = F.interpolate(x, size=input.size()[2:], mode='bilinear', align_corners=True)

        return x
 def forward(self, x):
     x, skip = x
     x = F.interpolate(x, scale_factor=2, mode='nearest')
     if skip is not None:
         x = torch.cat([x, skip], dim=1)
     x = self.block(x)
     return x
Beispiel #3
0
    def forward(self, x):
        """
        Arguments:
            x (list[Tensor]): feature maps for each feature level.
        Returns:
            results (tuple[Tensor]): feature maps after FPN layers.
                They are ordered from highest resolution first.
        """
        last_inner = getattr(self, self.inner_blocks[-1])(x[-1])
        results = []
        results.append(getattr(self, self.layer_blocks[-1])(last_inner))
        for feature, inner_block, layer_block in zip(
            x[:-1][::-1], self.inner_blocks[:-1][::-1], self.layer_blocks[:-1][::-1]
        ):
            inner_top_down = F.interpolate(last_inner, scale_factor=2, mode="nearest")
            inner_lateral = getattr(self, inner_block)(feature)
            # TODO use size instead of scale to make it robust to different sizes
            # inner_top_down = F.upsample(last_inner, size=inner_lateral.shape[-2:],
            # mode='bilinear', align_corners=False)
            last_inner = inner_lateral + inner_top_down
            results.insert(0, getattr(self, layer_block)(last_inner))

        if self.top_blocks is not None:
            last_results = self.top_blocks(results[-1])
            results.extend(last_results)

        return tuple(results)
Beispiel #4
0
    def make_score_map(self, img, mode='sigmoid'):
        """
        """
        img = img/255
        # The offset is inserted so that the final size of the score map matches
        # the search image. To know more see "How to overlay the search img with
        # the score map" in Trello/Report. It is half of the dimension of the
        # Smallest Class Equivalent of the Ref image.
        offset = (((self.ref.shape[0] + 1)//4)*4 - 1)//2
        img_mean = img.mean()
        img_padded = np.pad(img, ((offset, offset), (offset, offset), (0, 0)),
                            mode='constant', constant_values=img_mean)
        img_padded = numpy_to_torch_var(img_padded, device)
        srch_emb = self.net.get_embedding(img_padded)
        score_map = self.net.match_corr(self.ref_emb, srch_emb)
        dimx = score_map.shape[-1]
        dimy = score_map.shape[-2]
        score_map = score_map.view(-1, dimy, dimx)
        if mode == 'sigmoid':
            score_map = sigmoid(score_map)
        elif mode == 'norm':
            score_map = score_map - score_map.min()
            score_map = score_map/score_map.max()
        score_map = score_map.unsqueeze(0)
        # We upscale 4 times, because the total stride of the network is 4
        score_map = F.interpolate(score_map, scale_factor=4, mode='bilinear',
                                  align_corners=False)

        score_map = score_map.cpu()
        score_map = torch_var_to_numpy(score_map)

        return score_map
    def forward(self, input):
        x, low_level_feat = self.backbone(input)
        x = self.aspp(x)
        x = self.decoder(x, low_level_feat)
        x = F.interpolate(x, size=input.size()[2:], mode='bilinear', align_corners=True)

        return x
    def forward(self, x):
        x, skip = x

        x = F.interpolate(x, scale_factor=2, mode='nearest')
        skip = self.skip_conv(skip)

        x = x + skip
        return x
    def forward(self, x, low_level_feat):
        low_level_feat = self.conv1(low_level_feat)
        low_level_feat = self.bn1(low_level_feat)
        low_level_feat = self.relu(low_level_feat)

        x = F.interpolate(x, size=low_level_feat.size()[2:], mode='bilinear', align_corners=True)
        x = torch.cat((x, low_level_feat), dim=1)
        x = self.last_conv(x)

        return x
    def forward(self, x):
        x1 = self.aspp1(x)
        x2 = self.aspp2(x)
        x3 = self.aspp3(x)
        x4 = self.aspp4(x)
        x5 = self.global_avg_pool(x)
        x5 = F.interpolate(x5, size=x4.size()[2:], mode='bilinear', align_corners=True)
        x = torch.cat((x1, x2, x3, x4, x5), dim=1)

        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)

        return x
Beispiel #9
0
    def generate(self, target_layer):
        fmaps = self._find(self.fmap_pool, target_layer)
        grads = self._find(self.grad_pool, target_layer)
        weights = self._compute_grad_weights(grads)

        gcam = torch.mul(fmaps, weights).sum(dim=1, keepdim=True)
        gcam = F.relu(gcam)

        gcam = F.interpolate(
            gcam, self.image_shape, mode="bilinear", align_corners=False
        )

        B, C, H, W = gcam.shape
        gcam = gcam.view(B, -1)
        gcam -= gcam.min(dim=1, keepdim=True)[0]
        gcam /= gcam.max(dim=1, keepdim=True)[0]
        gcam = gcam.view(B, C, H, W)

        return gcam
    def forward(self, x):
        c5, c4, c3, c2, _ = x

        p5 = self.conv1(c5)
        p4 = self.p4([p5, c4])
        p3 = self.p3([p4, c3])
        p2 = self.p2([p3, c2])

        s5 = self.s5(p5)
        s4 = self.s4(p4)
        s3 = self.s3(p3)
        s2 = self.s2(p2)

        x = s5 + s4 + s3 + s2

        x = self.dropout(x)
        x = self.final_conv(x)

        x = F.interpolate(x, scale_factor=4, mode='bilinear', align_corners=True)
        return x
    def forward(self, x):

        features = self._get(x)
        x = self.psp(features)
        x = self.conv(x)
        if self.dropout_factor:
            x = self.dropout(x)
        x = self.final_conv(x)
        x = F.interpolate(
            x,
            scale_factor=self.downsample_factor,
            mode='bilinear',
            align_corners=True
        )

        if self.training and self.aux_output:
            aux = self.aux(features)
            x = [x, aux]

        return x
Beispiel #12
0
from torchvision import transforms

# import torch.functional as F
import torch.nn.functional as F

eps = np.finfo(np.float64).eps

plt.rcParams['figure.figsize'] = 10, 10


'''
Affine crop testing
'''

img_t = F.interpolate(transforms.ToTensor()(scipy.misc.face()).unsqueeze(0), size=(768, 768), mode='bilinear')

theta = torch.from_numpy(np.array([[[0.2, 0.0, 0.2], [0.0, 0.2, 0.3]]]))

grid = F.affine_grid(theta, torch.Size((1, 3, 768, 768)))
grid.size()

plt.rcParams['figure.figsize'] = 8, 8

fig, axis = plt.subplots(nrows=1, ncols=2)

axis[0].imshow(grid[0, :, :, 0])
axis[0].set_title('x')

axis[1].imshow(grid[0, :, :, 1])
axis[1].set_title('y')
Beispiel #13
0
 def forward(self, x):
     return F.interpolate(x, size=self.size, scale_factor=self.scale_factor,
                          mode=self.mode, align_corners=self.align_corners)
Beispiel #14
0
def random_resize(images, min_size=288, max_size=448):
    new_size = random.sample(list(range(min_size, max_size + 1, 32)), 1)[0]
    images = F.interpolate(images, size=new_size, mode="nearest")
    return images
    def add_overlay(self, tag, embed, img, alpha=0.8, cmap='inferno', add_ref=None):
        """ Adds to the summary the images of the input image (ref or search)
        overlayed with the corresponding embedding or correlation map. It expect
        tensors of with dimensions [C x H x W] or [B x C x H x W] if the tensor
        has a batch dimension it takes the FIRST ELEMENT of the batch. The image
        is displayed as fusion of the input image in grayscale and the overlay
        in the chosen color_map, this fusion is controlled by the alpha factor.
        In the case of the embeddings, since there are multiple feature
        channels, we show each of them individually in a grid.
        OBS: The colors represent relative values, where the peak color corresponds
        to the maximum value in any given channel, so no direct value comparisons
        can be made between epochs, only the relative distribution of neighboring
        pixel values, (which should be enough, since we are mosly interested
        in finding the maximum of a given correlation map)

        Args:
            tag: (str) The string identifying the image in tensorboard, images
                with the same tag are grouped together with a slider, and are
                indexed by epoch.
            embed: (torch.Tensor) The tensor containing the embedding of an
                input (ref or search image) or a correlation map (the final
                output). The shape should be [B, C, H, W] or [B, H, W] for the
                case of the correlation map.
            img: (torch.Tensor) The image on top of which the embed is going
                to be overlaid. Reference image embeddings should be overlaid
                on top of reference images and search image embeddings as well
                as the correlation maps should be overlaid on top of the search
                images.
            alpha: (float) A mixing variable, it controls how much of the final
                embedding corresponds to the grayscale input image and how much
                corresponds to the overlay. Alpha = 0, means there is no
                overlay in the final image, only the input image. Conversely,
                Alpha = 1 means there is only overlay. Adjust this value so
                you can distinctly see the overlay details while still seeing
                where it is in relation to the orignal image.
            cmap: (str) The name of the colormap to be used with the overlay.
                The colormaps are defined in the colormaps.py module, but values
                include 'viridis' (greenish blue) and 'inferno' (yellowish red).
            add_ref: (torch.Tensor) Optional. An additional reference image that
                will be plotted to the side of the other images. Useful when
                plotting correlation maps, because it lets the user see both
                the search image and the reference that is used as the target.

        ``Example``
            >>> summ_maker = SummaryMaker(os.path.join(exp_dir, 'tensorboard'), params,
                                           model.upscale_factor)
            ...
            >>> embed_ref = model.get_embedding(ref_img_batch)
            >>> embed_srch = model.get_embedding(search_batch)
            >>> output_batch = model.match_corr(embed_ref, embed_srch)
            >>> batch_index = 0
            >>> summ_maker.add_overlay("Ref_image_{}".format(tbx_index), embed_ref[batch_index], ref_img_batch[batch_index], cmap='inferno')
            >>> summ_maker.add_overlay("Search_image_{}".format(tbx_index), embed_srch[batch_index], search_batch[batch_index], cmap='inferno')
            >>> summ_maker.add_overlay("Correlation_map_{}".format(tbx_index), output_batch[batch_index], search_batch[batch_index], cmap='inferno')
        """
        # TODO Add numbers in the final image to the feature channels.
        # TODO Add the color bar showing the progression of values.
        # If minibatch is given, take only the first image
        # TODO let the user select the image? Loop on all images?
        if len(embed.shape) == 4:
            embed = embed[0]
        if len(img.shape) == 4:
            img = img[0]
        # Normalize the image.
        img = img - img.min()
        img = img/img.max()
        embed = cm.apply_cmap(embed, cmap=cmap)
        # Get grayscale version of image by taking the weighted average of the channels
        # as described in https://www.cs.virginia.edu/~vicente/recognition/notebooks/image_processing_lab.html#2.-Converting-to-Grayscale
        R,G,B = img
        img_gray = 0.21 * R + 0.72 * G + 0.07 * B
        # Get the upscaled size of the embedding, so as to take into account
        # the network's downscale caused by the stride.
        upsc_size = (embed.shape[-1] - 1) * self.up_factor + 1
        embed = F.interpolate(embed, upsc_size, mode='bilinear',
                              align_corners=False)
        # Pad the embedding with zeros to match the image dimensions. We pad
        # all 4 corners equally to keep the embedding centered.
        tot_pad = img.shape[-1] - upsc_size
        # Sanity check 1. The amount of padding must be equal on all sides, so
        # the total padding on any dimension must be an even integer.
        assert tot_pad % 2 == 0, "The embed or image dimensions are incorrect."
        pad = int(tot_pad/2)
        embed = F.pad(embed, (pad, pad, pad, pad), 'constant', 0)
        # Sanity check 2, the size of the embedding in the (H, w) dimensions
        # matches the size of the image.
        assert embed.shape[-2:] == img.shape[-2:], ("The embedding overlay "
                                                    "and image dimensions "
                                                    "do not agree.")
        final_imgs = alpha * embed + (1-alpha) * img_gray
        # The embedding_channel (or feature channel) dimension is treated like
        # a batch dimension, so the grid shows each individual embeding
        # overlayed with the input image. Plus the original image is also shown.
        # If add_ref is used the ref image is the first to be shown.
        img = img.unsqueeze(0)
        final_imgs = torch.cat((img, final_imgs))
        if add_ref is not None:
            # Pads the image if necessary
            pad = int((img.shape[-1] - add_ref.shape[-1])//2)
            add_ref = F.pad(add_ref, (pad, pad, pad, pad), 'constant', 0)
            add_ref = add_ref.unsqueeze(0)
            final_imgs = torch.cat((add_ref, final_imgs))
        final_imgs = make_grid(final_imgs, nrow=6)
        self.writer_val.add_image(tag, final_imgs, self.epoch)
Beispiel #16
0
 def forward(self, x):
     x = F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode)
     x = self.conv(x)
     return x
 def forward(self, x_small, x_big):
     x_small = self.up(x_small)
     x_small = F.interpolate(x_small, size=x_big.size()[2:], mode='bilinear', align_corners=True)
     x = torch.cat([x_big, x_small], dim=1)
     x = self.conv(x)
     return x
Beispiel #18
0
    def loss(self,
             cls_scores,
             bbox_preds,
             centernesses,
             cof_preds,
             feat_masks,
             track_feats,
             track_feats_ref,
             gt_bboxes,
             gt_labels,
             img_metas,
             cfg,
             gt_bboxes_ignore=None,
             gt_masks_list=None,
             ref_bboxes_list=None,
             gt_pids_list=None):
        assert len(cls_scores) == len(bbox_preds) == len(centernesses)
        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
        all_level_points = self.get_points(featmap_sizes, bbox_preds[0].dtype,
                                           bbox_preds[0].device)
        labels, bbox_targets, label_list, bbox_targets_list, gt_inds = self.fcos_target(
            all_level_points, gt_bboxes, gt_labels)

        # decode detection and groundtruth
        det_bboxes = []
        det_targets = []
        num_levels = len(bbox_preds)

        for img_id in range(len(img_metas)):
            bbox_pred_list = [
                bbox_preds[i][img_id].permute(1, 2, 0).reshape(-1, 4).detach()
                for i in range(num_levels)
            ]
            bbox_target_list = bbox_targets_list[img_id]

            bboxes = []
            targets = []
            for i in range(len(bbox_pred_list)):
                bbox_pred = bbox_pred_list[i]
                bbox_target = bbox_target_list[i]
                points = all_level_points[i]
                bboxes.append(distance2bbox(points, bbox_pred))
                targets.append(distance2bbox(points, bbox_target))

            bboxes = torch.cat(bboxes, dim=0)
            targets = torch.cat(targets, dim=0)

            det_bboxes.append(bboxes)
            det_targets.append(targets)
        gt_masks = []
        for i in range(len(gt_labels)):
            gt_label = gt_labels[i]
            gt_masks.append(
                torch.from_numpy(
                    np.array(gt_masks_list[i][:gt_label.shape[0]],
                             dtype=np.float32)).to(gt_label.device))

        num_imgs = cls_scores[0].size(0)
        # flatten cls_scores, bbox_preds and centerness
        flatten_cls_scores = [
            cls_score.permute(0, 2, 3, 1).reshape(-1, self.cls_out_channels)
            for cls_score in cls_scores
        ]
        flatten_bbox_preds = [
            bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4)
            for bbox_pred in bbox_preds
        ]
        flatten_centerness = [
            centerness.permute(0, 2, 3, 1).reshape(-1)
            for centerness in centernesses
        ]
        flatten_cls_scores = torch.cat(flatten_cls_scores)
        flatten_bbox_preds = torch.cat(flatten_bbox_preds)
        flatten_centerness = torch.cat(flatten_centerness)
        flatten_labels = torch.cat(labels)
        flatten_bbox_targets = torch.cat(bbox_targets)
        # repeat points to align with bbox_preds
        flatten_points = torch.cat(
            [points.repeat(num_imgs, 1) for points in all_level_points])

        pos_inds = flatten_labels.nonzero().reshape(-1)
        num_pos = len(pos_inds)
        loss_cls = self.loss_cls(flatten_cls_scores,
                                 flatten_labels,
                                 avg_factor=num_pos +
                                 num_imgs)  # avoid num_pos is 0

        pos_bbox_preds = flatten_bbox_preds[pos_inds]
        pos_centerness = flatten_centerness[pos_inds]

        if num_pos > 0:
            pos_bbox_targets = flatten_bbox_targets[pos_inds]
            pos_centerness_targets = self.centerness_target(pos_bbox_targets)
            pos_points = flatten_points[pos_inds]
            pos_decoded_bbox_preds = distance2bbox(pos_points, pos_bbox_preds)
            pos_decoded_target_preds = distance2bbox(pos_points,
                                                     pos_bbox_targets)
            # centerness weighted iou loss
            loss_bbox = self.loss_bbox(pos_decoded_bbox_preds,
                                       pos_decoded_target_preds,
                                       weight=pos_centerness_targets,
                                       avg_factor=pos_centerness_targets.sum())
            loss_centerness = self.loss_centerness(pos_centerness,
                                                   pos_centerness_targets)
        else:
            loss_bbox = pos_bbox_preds.sum()
            loss_centerness = pos_centerness.sum()

        ##########mask loss#################
        flatten_cls_scores1 = [
            cls_score.permute(0, 2, 3, 1).reshape(num_imgs, -1,
                                                  self.cls_out_channels)
            for cls_score in cls_scores
        ]
        flatten_cls_scores1 = torch.cat(flatten_cls_scores1, dim=1)

        flatten_cof_preds = [
            cof_pred.permute(0, 2, 3, 1).reshape(cof_pred.shape[0], -1, 32 * 4)
            for cof_pred in cof_preds
        ]

        loss_mask = 0
        loss_match = 0
        match_acc = 0
        n_total = 0
        flatten_cof_preds = torch.cat(flatten_cof_preds, dim=1)

        for i in range(num_imgs):
            labels = torch.cat(
                [labels_level.flatten() for labels_level in label_list[i]])
            bbox_dt = det_bboxes[i] / 2
            bbox_dt = bbox_dt.detach()
            pos_inds = (labels > 0).nonzero().view(-1)
            cof_pred = flatten_cof_preds[i][pos_inds]
            img_mask = feat_masks[i]
            mask_h = img_mask.shape[1]
            mask_w = img_mask.shape[2]
            idx_gt = gt_inds[i]
            bbox_dt = bbox_dt[pos_inds, :4]

            area = (bbox_dt[:, 2] - bbox_dt[:, 0]) * (bbox_dt[:, 3] -
                                                      bbox_dt[:, 1])
            bbox_dt = bbox_dt[area > 1.0, :]
            idx_gt = idx_gt[area > 1.0]
            cof_pred = cof_pred[area > 1.0]
            if bbox_dt.shape[0] == 0:
                loss_mask += area.sum() * 0
                continue

            bbox_gt = gt_bboxes[i]
            cls_score = flatten_cls_scores1[i, pos_inds, labels[pos_inds] -
                                            1].sigmoid().detach()
            cls_score = cls_score[area > 1.0]
            ious = bbox_overlaps(bbox_gt[idx_gt] / 2, bbox_dt, is_aligned=True)
            weighting = cls_score * ious
            weighting = weighting / (torch.sum(weighting) +
                                     0.0001) * len(weighting)

            ###################track####################
            bboxes = ref_bboxes_list[i]
            amplitude = 0.05
            random_offsets = bboxes.new_empty(bboxes.shape[0], 4).uniform_(
                -amplitude, amplitude)
            # before jittering
            cxcy = (bboxes[:, 2:4] + bboxes[:, :2]) / 2
            wh = (bboxes[:, 2:4] - bboxes[:, :2]).abs()
            # after jittering
            new_cxcy = cxcy + wh * random_offsets[:, :2]
            new_wh = wh * (1 + random_offsets[:, 2:])
            # xywh to xyxy
            new_x1y1 = (new_cxcy - new_wh / 2)
            new_x2y2 = (new_cxcy + new_wh / 2)
            new_bboxes = torch.cat([new_x1y1, new_x2y2], dim=1)
            # clip bboxes
            # print(bbox_dt.shape)
            track_feat_i = self.extract_box_feature_center_single(
                track_feats[i], bbox_dt * 2)
            track_box_ref = self.extract_box_feature_center_single(
                track_feats_ref[i], new_bboxes)

            gt_pids = gt_pids_list[i]
            cur_ids = gt_pids[idx_gt]
            prod = torch.mm(track_feat_i, torch.transpose(track_box_ref, 0, 1))
            m = prod.size(0)
            dummy = torch.zeros(m, 1, device=torch.cuda.current_device())

            prod_ext = torch.cat([dummy, prod], dim=1)
            loss_match += cross_entropy(prod_ext, cur_ids)
            n_total += len(idx_gt)
            match_acc += accuracy(prod_ext, cur_ids) * len(idx_gt)

            gt_mask = F.interpolate(gt_masks[i].unsqueeze(0),
                                    scale_factor=0.5,
                                    mode='bilinear',
                                    align_corners=False).squeeze(0)

            shape = np.minimum(feat_masks[i].shape, gt_mask.shape)
            gt_mask_new = gt_mask.new_zeros(gt_mask.shape[0], mask_h, mask_w)
            gt_mask_new[:gt_mask.shape[0], :shape[1], :
                        shape[2]] = gt_mask[:gt_mask.
                                            shape[0], :shape[1], :shape[2]]
            gt_mask_new = gt_mask_new.gt(0.5).float()

            gt_mask_new = torch.index_select(gt_mask_new, 0,
                                             idx_gt).permute(1, 2,
                                                             0).contiguous()

            #######spp###########################
            img_mask1 = img_mask.permute(1, 2, 0)
            pos_masks00 = torch.sigmoid(img_mask1 @ cof_pred[:, 0:32].t())
            pos_masks01 = torch.sigmoid(img_mask1 @ cof_pred[:, 32:64].t())
            pos_masks10 = torch.sigmoid(img_mask1 @ cof_pred[:, 64:96].t())
            pos_masks11 = torch.sigmoid(img_mask1 @ cof_pred[:, 96:128].t())
            pred_masks = torch.stack(
                [pos_masks00, pos_masks01, pos_masks10, pos_masks11], dim=0)
            pred_masks = self.crop_cuda(pred_masks, bbox_dt)
            gt_mask_crop = self.crop_gt_cuda(gt_mask_new, bbox_dt)
            # pred_masks, gt_mask_crop = crop_split(pos_masks00, pos_masks01, pos_masks10, pos_masks11, bbox_dt,
            #                                       gt_mask_new)
            pre_loss = F.binary_cross_entropy(pred_masks,
                                              gt_mask_crop,
                                              reduction='none')

            pos_get_csize = center_size(bbox_dt)
            gt_box_width = pos_get_csize[:, 2]
            gt_box_height = pos_get_csize[:, 3]
            pre_loss = pre_loss.sum(dim=(
                0, 1)) / gt_box_width / gt_box_height / pos_get_csize.shape[0]
            loss_mask += torch.sum(pre_loss * weighting.detach())

        loss_mask = loss_mask / num_imgs
        loss_match = loss_match / num_imgs
        match_acc = match_acc / n_total
        if loss_mask == 0:
            loss_mask = bbox_dt[:, 0].sum() * 0

        return dict(loss_cls=loss_cls,
                    loss_bbox=loss_bbox,
                    loss_centerness=loss_centerness,
                    loss_mask=loss_mask,
                    loss_match=loss_match,
                    match_acc=match_acc)
Beispiel #19
0
    def forward(self, feats, feats_x, flag_train=True):
        # return multi_apply(self.forward_single, feats, self.scales)
        cls_scores = []
        bbox_preds = []
        centernesses = []
        cof_preds = []
        feat_masks = []
        track_feats = []
        track_feats_ref = []
        count = 0
        for x, x_f, scale, stride in zip(feats, feats_x, self.scales,
                                         self.strides):
            cls_feat = x
            reg_feat = x
            track_feat = x
            track_feat_f = x_f

            for cls_layer in self.cls_convs:
                cls_feat = cls_layer(cls_feat)

            for reg_layer in self.reg_convs:
                reg_feat = reg_layer(reg_feat)

            if count < 3:
                for track_layer in self.track_convs:
                    track_feat = track_layer(track_feat)
                track_feat = F.interpolate(track_feat,
                                           scale_factor=(2**count),
                                           mode='bilinear',
                                           align_corners=False)
                track_feats.append(track_feat)
                if flag_train:
                    for track_layer in self.track_convs:
                        track_feat_f = track_layer(track_feat_f)
                    track_feat_f = F.interpolate(track_feat_f,
                                                 scale_factor=(2**count),
                                                 mode='bilinear',
                                                 align_corners=False)
                    track_feats_ref.append(track_feat_f)
            # scale the bbox_pred of different level
            # float to avoid overflow when enabling FP16
            bbox_pred = scale(self.fcos_reg(reg_feat))

            cls_feat = self.feat_align(cls_feat, bbox_pred)
            cls_score = self.fcos_cls(cls_feat)
            centerness = self.fcos_centerness(reg_feat)
            centernesses.append(centerness)
            cls_scores.append(cls_score)
            bbox_preds.append(bbox_pred.float() * stride)

            ########COFFECIENTS###############
            cof_pred = self.sip_cof(cls_feat)
            cof_preds.append(cof_pred)

            ############contextual#######################
            if count < 3:
                feat_up = F.interpolate(reg_feat,
                                        scale_factor=(2**count),
                                        mode='bilinear',
                                        align_corners=False)
                feat_masks.append(feat_up)
            count = count + 1
        # ################contextual enhanced##################
        feat_masks = torch.cat(feat_masks, dim=1)
        feat_masks = self.relu(
            self.sip_mask_lat(self.relu(self.sip_mask_lat0(feat_masks))))
        feat_masks = F.interpolate(feat_masks,
                                   scale_factor=4,
                                   mode='bilinear',
                                   align_corners=False)

        track_feats = torch.cat(track_feats, dim=1)
        track_feats = self.sipmask_track(track_feats)
        if flag_train:
            track_feats_ref = torch.cat(track_feats_ref, dim=1)
            track_feats_ref = self.sipmask_track(track_feats_ref)
            return cls_scores, bbox_preds, centernesses, cof_preds, feat_masks, track_feats, track_feats_ref
        else:
            return cls_scores, bbox_preds, centernesses, cof_preds, feat_masks, track_feats, track_feats
def validation(model, val_loader, epoch, writer):
    # set evaluate mode
    model.eval()

    total_correct, total_label = 0, 0
    total_correct_hb, total_label_hb = 0, 0
    total_correct_fb, total_label_fb = 0, 0
    hist = np.zeros((args.num_classes, args.num_classes))
    hist_hb = np.zeros((args.hbody_cls, args.hbody_cls))
    hist_fb = np.zeros((args.fbody_cls, args.fbody_cls))

    # Iterate over data.
    bar = Bar('Processing {}'.format('val'), max=len(val_loader))
    bar.check_tty = False
    for idx, batch in enumerate(val_loader):
        image, target, hlabel, flabel, _ = batch
        image, target, hlabel, flabel = image.cuda(), target.cuda(
        ), hlabel.cuda(), flabel.cuda()
        with torch.no_grad():
            h, w = target.size(1), target.size(2)
            outputs = model(image)
            outputs = gather(outputs, 0, dim=0)
            preds = F.interpolate(input=outputs[0][-1],
                                  size=(h, w),
                                  mode='bilinear',
                                  align_corners=True)
            preds_hb = F.interpolate(input=outputs[1][-1],
                                     size=(h, w),
                                     mode='bilinear',
                                     align_corners=True)
            preds_fb = F.interpolate(input=outputs[2][-1],
                                     size=(h, w),
                                     mode='bilinear',
                                     align_corners=True)
            if idx % 50 == 0:
                img_vis = inv_preprocess(image, num_images=args.save_num)
                label_vis = decode_predictions(target.int(),
                                               num_images=args.save_num,
                                               num_classes=args.num_classes)
                pred_vis = decode_predictions(torch.argmax(preds, dim=1),
                                              num_images=args.save_num,
                                              num_classes=args.num_classes)

                # visual grids
                img_grid = torchvision.utils.make_grid(
                    torch.from_numpy(img_vis.transpose(0, 3, 1, 2)))
                label_grid = torchvision.utils.make_grid(
                    torch.from_numpy(label_vis.transpose(0, 3, 1, 2)))
                pred_grid = torchvision.utils.make_grid(
                    torch.from_numpy(pred_vis.transpose(0, 3, 1, 2)))
                writer.add_image('val_images', img_grid,
                                 epoch * len(val_loader) + idx + 1)
                writer.add_image('val_labels', label_grid,
                                 epoch * len(val_loader) + idx + 1)
                writer.add_image('val_preds', pred_grid,
                                 epoch * len(val_loader) + idx + 1)

            # pixelAcc
            correct, labeled = batch_pix_accuracy(preds.data, target)
            correct_hb, labeled_hb = batch_pix_accuracy(preds_hb.data, hlabel)
            correct_fb, labeled_fb = batch_pix_accuracy(preds_fb.data, flabel)
            # mIoU
            hist += fast_hist(preds, target, args.num_classes)
            hist_hb += fast_hist(preds_hb, hlabel, args.hbody_cls)
            hist_fb += fast_hist(preds_fb, flabel, args.fbody_cls)

            total_correct += correct
            total_correct_hb += correct_hb
            total_correct_fb += correct_fb
            total_label += labeled
            total_label_hb += labeled_hb
            total_label_fb += labeled_fb
            pixAcc = 1.0 * total_correct / (np.spacing(1) + total_label)
            IoU = round(np.nanmean(per_class_iu(hist)) * 100, 2)
            pixAcc_hb = 1.0 * total_correct_hb / (np.spacing(1) +
                                                  total_label_hb)
            IoU_hb = round(np.nanmean(per_class_iu(hist_hb)) * 100, 2)
            pixAcc_fb = 1.0 * total_correct_fb / (np.spacing(1) +
                                                  total_label_fb)
            IoU_fb = round(np.nanmean(per_class_iu(hist_fb)) * 100, 2)
            # plot progress
            bar.suffix = '{} / {} | pixAcc: {pixAcc:.4f}, mIoU: {IoU:.4f} |' \
                         'pixAcc_hb: {pixAcc_hb:.4f}, mIoU_hb: {IoU_hb:.4f} |' \
                         'pixAcc_fb: {pixAcc_fb:.4f}, mIoU_fb: {IoU_fb:.4f}'.format(idx + 1, len(val_loader),
                                                                                    pixAcc=pixAcc, IoU=IoU,
                                                                                    pixAcc_hb=pixAcc_hb, IoU_hb=IoU_hb,
                                                                                    pixAcc_fb=pixAcc_fb, IoU_fb=IoU_fb)
            bar.next()

    print('\n per class iou part: {}'.format(per_class_iu(hist) * 100))
    print('per class iou hb: {}'.format(per_class_iu(hist_hb) * 100))
    print('per class iou fb: {}'.format(per_class_iu(hist_fb) * 100))

    mIoU = round(np.nanmean(per_class_iu(hist)) * 100, 2)
    mIoU_hb = round(np.nanmean(per_class_iu(hist_hb)) * 100, 2)
    mIoU_fb = round(np.nanmean(per_class_iu(hist_fb)) * 100, 2)

    writer.add_scalar('val_pixAcc', pixAcc, epoch)
    writer.add_scalar('val_mIoU', mIoU, epoch)
    writer.add_scalar('val_pixAcc_hb', pixAcc_hb, epoch)
    writer.add_scalar('val_mIoU_hb', mIoU_hb, epoch)
    writer.add_scalar('val_pixAcc_fb', pixAcc_fb, epoch)
    writer.add_scalar('val_mIoU_fb', mIoU_fb, epoch)
    bar.finish()

    return pixAcc, mIoU
Beispiel #21
0
 def _shortcut(self, x):
     if self.upsample:
         x = F.interpolate(x, scale_factor=2, mode='nearest')
     if self.learned_sc:
         x = self.conv1x1(x)
     return x
Beispiel #22
0
 def __unpool(self, input):
     _, _, H, W = input.shape
     return F.interpolate(input, mode='bilinear', scale_factor=2, align_corners=True)
Beispiel #23
0
def train(hyp, opt, device, tb_writer=None, wandb=None):
    logger.info(f'Hyperparameters {hyp}')
    save_dir, epochs, batch_size, total_batch_size, weights, rank = \
        Path(opt.save_dir), opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank

    # Directories
    wdir = save_dir / 'weights'
    wdir.mkdir(parents=True, exist_ok=True)  # make dir
    last = wdir / 'last.pt'
    best = wdir / 'best.pt'
    results_file = save_dir / 'results.txt'

    # Save run settings
    with open(save_dir / 'hyp.yaml', 'w') as f:
        yaml.dump(hyp, f, sort_keys=False)
    with open(save_dir / 'opt.yaml', 'w') as f:
        yaml.dump(vars(opt), f, sort_keys=False)

    # Configure
    plots = not opt.evolve  # create plots
    cuda = device.type != 'cpu'
    init_seeds(2 + rank)
    with open(opt.data) as f:
        data_dict = yaml.load(f, Loader=yaml.FullLoader)  # data dict
    with torch_distributed_zero_first(rank):
        check_dataset(data_dict)  # check
    train_path = data_dict['train']
    test_path = data_dict['val']
    nc = 1 if opt.single_cls else int(data_dict['nc'])  # number of classes
    names = ['item'] if opt.single_cls and len(data_dict['names']) != 1 else data_dict['names']  # class names
    assert len(names) == nc, '%g names found for nc=%g dataset in %s' % (len(names), nc, opt.data)  # check

    # Model
    pretrained = weights.endswith('.pt')
    if pretrained:
        with torch_distributed_zero_first(rank):
            attempt_download(weights)  # download if not found locally
        ckpt = torch.load(weights, map_location=device)  # load checkpoint
        if hyp.get('anchors'):
            ckpt['model'].yaml['anchors'] = round(hyp['anchors'])  # force autoanchor
        model = Model(opt.cfg or ckpt['model'].yaml, ch=3, nc=nc).to(device)  # create
        exclude = ['anchor'] if opt.cfg or hyp.get('anchors') else []  # exclude keys
        state_dict = ckpt['model'].float().state_dict()  # to FP32
        state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude)  # intersect
        model.load_state_dict(state_dict, strict=False)  # load
        logger.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights))  # report
    else:
        model = Model(opt.cfg, ch=3, nc=nc).to(device)  # create

    # Freeze
    freeze = []  # parameter names to freeze (full or partial)
    for k, v in model.named_parameters():
        v.requires_grad = True  # train all layers
        if any(x in k for x in freeze):
            print('freezing %s' % k)
            v.requires_grad = False

    # Optimizer
    nbs = 64  # nominal batch size
    accumulate = max(round(nbs / total_batch_size), 1)  # accumulate loss before optimizing
    hyp['weight_decay'] *= total_batch_size * accumulate / nbs  # scale weight_decay
    logger.info(f"Scaled weight_decay = {hyp['weight_decay']}")

    pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
    for k, v in model.named_modules():
        if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):
            pg2.append(v.bias)  # biases
        if isinstance(v, nn.BatchNorm2d):
            pg0.append(v.weight)  # no decay
        elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):
            pg1.append(v.weight)  # apply decay

    if opt.adam:
        optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))  # adjust beta1 to momentum
    else:
        optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)

    optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']})  # add pg1 with weight_decay
    optimizer.add_param_group({'params': pg2})  # add pg2 (biases)
    logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0)))
    del pg0, pg1, pg2

    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
    # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR
    lf = one_cycle(1, hyp['lrf'], epochs)  # cosine 1->hyp['lrf']
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
    # plot_lr_scheduler(optimizer, scheduler, epochs)

    # Logging
    if rank in [-1, 0] and wandb and wandb.run is None:
        opt.hyp = hyp  # add hyperparameters
        wandb_run = wandb.init(config=opt, resume="allow",
                               project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem,
                               name=save_dir.stem,
                               id=ckpt.get('wandb_id') if 'ckpt' in locals() else None)
    loggers = {'wandb': wandb}  # loggers dict

    # Resume
    start_epoch, best_fitness = 0, 0.0
    if pretrained:
        # Optimizer
        if ckpt['optimizer'] is not None:
            optimizer.load_state_dict(ckpt['optimizer'])
            best_fitness = ckpt['best_fitness']

        # Results
        if ckpt.get('training_results') is not None:
            with open(results_file, 'w') as file:
                file.write(ckpt['training_results'])  # write results.txt

        # Epochs
        start_epoch = ckpt['epoch'] + 1
        if opt.resume:
            assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % (weights, epochs)
        if epochs < start_epoch:
            logger.info('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' %
                        (weights, ckpt['epoch'], epochs))
            epochs += ckpt['epoch']  # finetune additional epochs

        del ckpt, state_dict

    # Image sizes
    gs = int(model.stride.max())  # grid size (max stride)
    nl = model.model[-1].nl  # number of detection layers (used for scaling hyp['obj'])
    imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size]  # verify imgsz are gs-multiples

    # DP mode
    if cuda and rank == -1 and torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(model)

    # SyncBatchNorm
    if opt.sync_bn and cuda and rank != -1:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
        logger.info('Using SyncBatchNorm()')

    # EMA
    ema = ModelEMA(model) if rank in [-1, 0] else None

    # DDP mode
    if cuda and rank != -1:
        model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank)

    # Trainloader
    dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt,
                                            hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank,
                                            world_size=opt.world_size, workers=opt.workers,
                                            image_weights=opt.image_weights, quad=opt.quad)
    mlc = np.concatenate(dataset.labels, 0)[:, 0].max()  # max label class
    nb = len(dataloader)  # number of batches
    assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % (mlc, nc, opt.data, nc - 1)

    # Process 0
    if rank in [-1, 0]:
        ema.updates = start_epoch * nb // accumulate  # set EMA updates
        testloader = create_dataloader(test_path, imgsz_test, total_batch_size, gs, opt,  # testloader
                                       hyp=hyp, cache=opt.cache_images and not opt.notest, rect=True,
                                       rank=-1, world_size=opt.world_size, workers=opt.workers, pad=0.5)[0]

        if not opt.resume:
            labels = np.concatenate(dataset.labels, 0)
            c = torch.tensor(labels[:, 0])  # classes
            # cf = torch.bincount(c.long(), minlength=nc) + 1.  # frequency
            # model._initialize_biases(cf.to(device))
            if plots:
                plot_labels(labels, save_dir, loggers)
                if tb_writer:
                    tb_writer.add_histogram('classes', c, 0)

            # Anchors
            if not opt.noautoanchor:
                check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)

    # Model parameters
    hyp['box'] *= 3. / nl  # scale to layers
    hyp['cls'] *= nc / 80. * 3. / nl  # scale to classes and layers
    hyp['obj'] *= (imgsz / 640) ** 2 * 3. / nl  # scale to image size and layers
    model.nc = nc  # attach number of classes to model
    model.hyp = hyp  # attach hyperparameters to model
    model.gr = 1.0  # iou loss ratio (obj_loss = 1.0 or iou)
    model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc  # attach class weights
    model.names = names

    # Start training
    t0 = time.time()
    nw = max(round(hyp['warmup_epochs'] * nb), 1000)  # number of warmup iterations, max(3 epochs, 1k iterations)
    # nw = min(nw, (epochs - start_epoch) / 2 * nb)  # limit warmup to < 1/2 of training
    maps = np.zeros(nc)  # mAP per class
    results = (0, 0, 0, 0, 0, 0, 0)  # P, R, [email protected], [email protected], val_loss(box, obj, cls)
    scheduler.last_epoch = start_epoch - 1  # do not move
    scaler = amp.GradScaler(enabled=cuda)
    logger.info('Image sizes %g train, %g test\n'
                'Using %g dataloader workers\nLogging results to %s\n'
                'Starting training for %g epochs...' % (imgsz, imgsz_test, dataloader.num_workers, save_dir, epochs))
    for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
        model.train()

        # Update image weights (optional)
        if opt.image_weights:
            # Generate indices
            if rank in [-1, 0]:
                cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc  # class weights
                iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw)  # image weights
                dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n)  # rand weighted idx
            # Broadcast if DDP
            if rank != -1:
                indices = (torch.tensor(dataset.indices) if rank == 0 else torch.zeros(dataset.n)).int()
                dist.broadcast(indices, 0)
                if rank != 0:
                    dataset.indices = indices.cpu().numpy()

        # Update mosaic border
        # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
        # dataset.mosaic_border = [b - imgsz, -b]  # height, width borders

        mloss = torch.zeros(4, device=device)  # mean losses
        if rank != -1:
            dataloader.sampler.set_epoch(epoch)
        pbar = enumerate(dataloader)
        logger.info(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'total', 'targets', 'img_size'))
        if rank in [-1, 0]:
            pbar = tqdm(pbar, total=nb)  # progress bar
        optimizer.zero_grad()
        for i, (imgs, targets, paths, _) in pbar:  # batch -------------------------------------------------------------
            ni = i + nb * epoch  # number integrated batches (since train start)
            imgs = imgs.to(device, non_blocking=True).float() / 255.0  # uint8 to float32, 0-255 to 0.0-1.0

            # Warmup
            if ni <= nw:
                xi = [0, nw]  # x interp
                # model.gr = np.interp(ni, xi, [0.0, 1.0])  # iou loss ratio (obj_loss = 1.0 or iou)
                accumulate = max(1, np.interp(ni, xi, [1, nbs / total_batch_size]).round())
                for j, x in enumerate(optimizer.param_groups):
                    # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
                    x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
                    if 'momentum' in x:
                        x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']])

            # Multi-scale
            if opt.multi_scale:
                sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs  # size
                sf = sz / max(imgs.shape[2:])  # scale factor
                if sf != 1:
                    ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]  # new shape (stretched to gs-multiple)
                    imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)

            # Forward
            with amp.autocast(enabled=cuda):
                pred = model(imgs)  # forward
                loss, loss_items = compute_loss(pred, targets.to(device), model)  # loss scaled by batch_size
                if rank != -1:
                    loss *= opt.world_size  # gradient averaged between devices in DDP mode
                if opt.quad:
                    loss *= 4.

            # Backward
            scaler.scale(loss).backward()

            # Optimize
            if ni % accumulate == 0:
                scaler.step(optimizer)  # optimizer.step
                scaler.update()
                optimizer.zero_grad()
                if ema:
                    ema.update(model)

            # Print
            if rank in [-1, 0]:
                mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
                mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0)  # (GB)
                s = ('%10s' * 2 + '%10.4g' * 6) % (
                    '%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1])
                pbar.set_description(s)

                # Plot
                if plots and ni < 3:
                    f = save_dir / f'train_batch{ni}.jpg'  # filename
                    Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start()
                    # if tb_writer:
                    #     tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch)
                    #     tb_writer.add_graph(model, imgs)  # add model to tensorboard
                elif plots and ni == 3 and wandb:
                    wandb.log({"Mosaics": [wandb.Image(str(x), caption=x.name) for x in save_dir.glob('train*.jpg')]})

            # end batch ------------------------------------------------------------------------------------------------
        # end epoch ----------------------------------------------------------------------------------------------------

        # Scheduler
        lr = [x['lr'] for x in optimizer.param_groups]  # for tensorboard
        scheduler.step()

        # DDP process 0 or single-GPU
        if rank in [-1, 0]:
            # mAP
            if ema:
                ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride', 'class_weights'])
            final_epoch = epoch + 1 == epochs
            if not opt.notest or final_epoch:  # Calculate mAP
                results, maps, times = test.test(opt.data,
                                                 batch_size=total_batch_size,
                                                 imgsz=imgsz_test,
                                                 model=ema.ema,
                                                 single_cls=opt.single_cls,
                                                 dataloader=testloader,
                                                 save_dir=save_dir,
                                                 plots=plots and final_epoch,
                                                 log_imgs=opt.log_imgs if wandb else 0)

            # Write
            with open(results_file, 'a') as f:
                f.write(s + '%10.4g' * 7 % results + '\n')  # P, R, [email protected], [email protected], val_loss(box, obj, cls)
            if len(opt.name) and opt.bucket:
                os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name))

            # Log
            tags = ['train/box_loss', 'train/obj_loss', 'train/cls_loss',  # train loss
                    'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95',
                    'val/box_loss', 'val/obj_loss', 'val/cls_loss',  # val loss
                    'x/lr0', 'x/lr1', 'x/lr2']  # params
            for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags):
                if tb_writer:
                    tb_writer.add_scalar(tag, x, epoch)  # tensorboard
                if wandb:
                    wandb.log({tag: x})  # W&B

            # Update best mAP
            fi = fitness(np.array(results).reshape(1, -1))  # weighted combination of [P, R, [email protected], [email protected]]
            if fi > best_fitness:
                best_fitness = fi

            # Save model
            save = (not opt.nosave) or (final_epoch and not opt.evolve)
            if save:
                with open(results_file, 'r') as f:  # create checkpoint
                    ckpt = {'epoch': epoch,
                            'best_fitness': best_fitness,
                            'training_results': f.read(),
                            'model': ema.ema,
                            'optimizer': None if final_epoch else optimizer.state_dict(),
                            'wandb_id': wandb_run.id if wandb else None}

                # Save last, best and delete
                torch.save(ckpt, last)
                if best_fitness == fi:
                    torch.save(ckpt, best)
                del ckpt
        # end epoch ----------------------------------------------------------------------------------------------------
    # end training

    if rank in [-1, 0]:
        # Strip optimizers
        final = best if best.exists() else last  # final model
        for f in [last, best]:
            if f.exists():
                strip_optimizer(f)  # strip optimizers
        if opt.bucket:
            os.system(f'gsutil cp {final} gs://{opt.bucket}/weights')  # upload

        # Plots
        if plots:
            plot_results(save_dir=save_dir)  # save as results.png
            if wandb:
                files = ['results.png', 'precision_recall_curve.png', 'confusion_matrix.png']
                wandb.log({"Results": [wandb.Image(str(save_dir / f), caption=f) for f in files
                                       if (save_dir / f).exists()]})
                if opt.log_artifacts:
                    wandb.log_artifact(artifact_or_path=str(final), type='model', name=save_dir.stem)

        # Test best.pt
        logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
        if opt.data.endswith('coco.yaml') and nc == 80:  # if COCO
            for conf, iou, save_json in ([0.25, 0.45, False], [0.001, 0.65, True]):  # speed, mAP tests
                results, _, _ = test.test(opt.data,
                                          batch_size=total_batch_size,
                                          imgsz=imgsz_test,
                                          conf_thres=conf,
                                          iou_thres=iou,
                                          model=attempt_load(final, device).half(),
                                          single_cls=opt.single_cls,
                                          dataloader=testloader,
                                          save_dir=save_dir,
                                          save_json=save_json,
                                          plots=False)

    else:
        dist.destroy_process_group()

    wandb.run.finish() if wandb and wandb.run else None
    torch.cuda.empty_cache()
    return results
def geo_lng(dataloader, args):
    mappings = pickle.load(open('util_files/country_lang_mappings.pkl', 'rb'))
    iso3_to_lang = mappings['iso3_to_lang']
    # Country to iso3 mappings that are missing
    missing = {
        'South+Korea': 'KOR',
        'North+Korea': 'PRK',
        'Laos': 'LAO',
        'Caribbean+Netherlands': 'BES',
        'St.+Lucia': 'LCA',
        'East+Timor': 'TLS',
        'Democratic+Republic+of+Congo': 'COD',
        'Swaziland': 'SWZ',
        'Cape+Verde': 'CPV',
        'C%C3%B4te+d%C2%B4Ivoire': 'CIV',
        'Ivory+Coast': 'CIV',
        'Channel+Islands': 'GBR'
    }

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    model = models.alexnet(pretrained=True).to(device)
    new_classifier = nn.Sequential(*list(model.classifier.children())[:-1])
    model.classifier = new_classifier

    with_country = dataloader.dataset.with_country

    country_with_langs = {}
    country_with_imgs = {
    }  # for each country, first list is tourist second is local
    lang_counts = {}

    detecter = fasttext.load_model('util_files/lid.176.bin')
    lang_dict = {}
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    for i, (data, target) in enumerate(tqdm(dataloader)):
        if data is None:
            continue
        this_tags = [
            tag['label'] for tag in target[0] if len(tag['label']) >= 3
        ]
        if len(this_tags) > 0:
            srcz = []
            conf = []
            for tag in this_tags:
                classify = detecter.predict(tag)
                srcz.append(classify[0][0][9:])
                conf.append(classify[1][0])

            # Pick out the most common language
            commons = Counter(srcz).most_common()
            the_src = commons[0][0]
            # If the most common language is English, look at the second most common language
            # since people oftentimes use English even when it's not their native language
            if the_src == 'en' and len(commons) > 1:
                the_src_maybe = commons[1][0]
                words = [
                    i for i in range(len(srcz)) if srcz[i] == the_src_maybe
                ]
                # If this second most common language has been classified with more than .5
                # probability, then choose this language for the image
                for word in words:
                    if conf[word] > .5:
                        the_src = the_src_maybe
            if the_src in lang_counts.keys():
                lang_counts[the_src] += 1
            else:
                lang_counts[the_src] = 1

            country = target[2][0]
            iso3 = None
            local = None
            try:
                iso3 = pycountry.countries.search_fuzzy(
                    country.replace('+', ' '))[0].alpha_3
            except LookupError:
                iso3 = missing[country]
            try:
                country_info = CountryInfo(country.replace('+', ' ')).info()
            except KeyError:
                country_info = {}
            country_name = country.split('+')
            if 'name' in country_info.keys():
                country_name += country_info['name']
            if 'nativeName' in country_info.keys():
                country_name += country_info['nativeName']

            # When comparing images to distinguish between tourist and local, we further look into the content of the tags,
            # allowing some images to be categorized as 'unknown' if we are not that sure if it's tourist or local

            # Local: in a local language, country's name isn't a tag, and 'travel' isn't a tag
            # Tourist: in a non-local language, or 'travel' is a tag
            try:
                if the_src in iso3_to_lang[iso3] and len(
                        set(country_name)
                        & set(this_tags)) == 0 and 'travel' not in this_tags:
                    local = 1
                elif the_src not in iso3_to_lang[iso3] or 'travel' in this_tags:
                    local = 0
            except KeyError:
                print("This iso3 can't be found in iso3_to_lang: {}".format(
                    iso3))

            if country not in country_with_langs.keys():
                country_with_langs[country] = []
                country_with_imgs[country] = [[], []]
            country_with_langs[country].append(the_src)
            if local is not None:
                if len(country_with_imgs[country][local]) < 500:
                    data = normalize(data).to(device)
                    big_data = F.interpolate(data.unsqueeze(0),
                                             size=224,
                                             mode='bilinear').to(device)
                    this_features = model.forward(big_data)
                    country_with_imgs[country][local].append(
                        (this_features.data.cpu().numpy(), target[3]))

    info = {}
    info['lang_counts'] = lang_counts
    info['country_with_langs'] = country_with_langs
    info['country_with_imgs'] = country_with_imgs

    pickle.dump(info, open("results/{}/geo_lng.pkl".format(args.folder), "wb"))
def geo_tag_region(dataloader, args):
    # map from a region name to a list whose value at index i represents count of category
    # i
    region_tags = {}
    tag_to_region_features = {}
    categories = dataloader.dataset.categories

    if not os.path.exists("results/{}/geo_ctr.pkl".format(args.folder)):
        print('running geo_ctr_region() first to get necessary info...')
        geo_ctr_region(dataloader, args)

    counts = pickle.load(
        open("results/{}/geo_ctr.pkl".format(args.folder), "rb"))
    id_to_region = counts_gps['id_to_region']

    # get name of regions
    unique_regions = list(set(id_to_region.values()))

    # Extracts features from model pretrained on ImageNet
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    model = models.alexnet(pretrained=True).to(device)
    new_classifier = nn.Sequential(*list(model.classifier.children())[:-1])
    model.classifier = new_classifier
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    region_features = {}
    for region in unique_regions:
        region_features[region] = []

    for cat in range(len(categories)):
        tag_to_region_features[cat] = copy.deepcopy(region_features)

    for i, (data, target) in enumerate(tqdm(dataloader)):
        if data is None:
            continue
        region_name = id_to_region[target[3]]
        anns = target[0]
        filepath = target[3]
        this_categories = list(
            set([categories.index(ann['label']) for ann in anns]))

        if region_name not in region_tags.keys():
            region_tags[region_name] = np.zeros(len(categories))
        this_features = None
        for cat in this_categories:
            if len(tag_to_region_features[cat][region_name]) < 500:
                data = normalize(data).to(device)
                big_data = F.interpolate(data.unsqueeze(0),
                                         size=224,
                                         mode='bilinear').to(device)
                this_features = model.forward(big_data)
                break
        for cat in this_categories:
            if this_features is not None and len(
                    tag_to_region_features[cat][region_name]) < 500:
                tag_to_region_features[cat][region_name].append(
                    (this_features.data.cpu().numpy(), filepath))
        for ann in anns:
            region_tags[region_name][categories.index(ann['label'])] += 1
    info_stats = {}
    info_stats['region_tags'] = region_tags
    info_stats['tag_to_region_features'] = tag_to_region_features
    pickle.dump(info_stats,
                open("results/{}/geo_tag.pkl".format(args.folder), "wb"))
Beispiel #26
0
    def get_bboxes_single(self,
                          cls_scores,
                          bbox_preds,
                          centernesses,
                          cof_preds,
                          feat_mask,
                          mlvl_points,
                          img_shape,
                          ori_shape,
                          scale_factor,
                          cfg,
                          rescale=False):
        assert len(cls_scores) == len(bbox_preds) == len(mlvl_points)
        mlvl_bboxes = []
        mlvl_scores = []
        mlvl_centerness = []
        mlvl_cofs = []
        for cls_score, bbox_pred, cof_pred, centerness, points in zip(
                cls_scores, bbox_preds, cof_preds, centernesses, mlvl_points):
            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
            scores = cls_score.permute(1, 2, 0).reshape(
                -1, self.cls_out_channels).sigmoid()
            centerness = centerness.permute(1, 2, 0).reshape(-1).sigmoid()

            bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
            cof_pred = cof_pred.permute(1, 2, 0).reshape(-1, 32 * 4)

            nms_pre = cfg.get('nms_pre', -1)
            if nms_pre > 0 and scores.shape[0] > nms_pre:
                max_scores, _ = (scores * centerness[:, None]).max(dim=1)
                _, topk_inds = max_scores.topk(nms_pre)
                points = points[topk_inds, :]
                bbox_pred = bbox_pred[topk_inds, :]
                cof_pred = cof_pred[topk_inds, :]
                scores = scores[topk_inds, :]
                centerness = centerness[topk_inds]
            bboxes = distance2bbox(points, bbox_pred, max_shape=img_shape)
            mlvl_cofs.append(cof_pred)
            mlvl_bboxes.append(bboxes)
            mlvl_scores.append(scores)
            mlvl_centerness.append(centerness)
        mlvl_bboxes = torch.cat(mlvl_bboxes)
        mlvl_cofs = torch.cat(mlvl_cofs)

        if rescale:
            mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
        mlvl_scores = torch.cat(mlvl_scores)
        padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
        mlvl_scores = torch.cat([padding, mlvl_scores], dim=1)
        mlvl_centerness = torch.cat(mlvl_centerness)

        mlvl_scores = mlvl_scores * mlvl_centerness.view(-1, 1)
        det_bboxes, det_labels, det_cofs = self.fast_nms(
            mlvl_bboxes,
            mlvl_scores[:, 1:].transpose(1, 0).contiguous(),
            mlvl_cofs,
            cfg,
            iou_threshold=0.5)
        masks = []
        if det_bboxes.shape[0] > 0:
            scale = 2
            #####spp########################
            img_mask1 = feat_mask.permute(1, 2, 0)
            pos_masks00 = torch.sigmoid(img_mask1 @ det_cofs[:, 0:32].t())
            pos_masks01 = torch.sigmoid(img_mask1 @ det_cofs[:, 32:64].t())
            pos_masks10 = torch.sigmoid(img_mask1 @ det_cofs[:, 64:96].t())
            pos_masks11 = torch.sigmoid(img_mask1 @ det_cofs[:, 96:128].t())
            if rescale:
                pos_masks = torch.stack(
                    [pos_masks00, pos_masks01, pos_masks10, pos_masks11],
                    dim=0)
                pos_masks = self.crop_cuda(
                    pos_masks, det_bboxes[:, :4] *
                    det_bboxes.new_tensor(scale_factor) / scale)
                # pos_masks = crop_split(pos_masks00, pos_masks01, pos_masks10, pos_masks11, det_bboxes * det_bboxes.new_tensor(scale_factor) / scale)
            else:
                pos_masks = torch.stack(
                    [pos_masks00, pos_masks01, pos_masks10, pos_masks11],
                    dim=0)
                pos_masks = self.crop_cuda(pos_masks,
                                           det_bboxes[:, :4] / scale)
                # pos_masks = crop_split(pos_masks00, pos_masks01, pos_masks10, pos_masks11, det_bboxes / scale)
            pos_masks = pos_masks.permute(2, 0, 1)
            if rescale:
                masks = F.interpolate(pos_masks.unsqueeze(0),
                                      scale_factor=scale / scale_factor,
                                      mode='bilinear',
                                      align_corners=False).squeeze(0)
            else:
                masks = F.interpolate(pos_masks.unsqueeze(0),
                                      scale_factor=scale,
                                      mode='bilinear',
                                      align_corners=False).squeeze(0)
            masks.gt_(0.5)

        return det_bboxes, det_labels, masks
 def __upsample_cat(self, p2, p3, p4, p5):
     h, w = p2.size()[2:]
     p3 = F.interpolate(p3, size=(h, w), mode='bilinear', align_corners=False)
     p4 = F.interpolate(p4, size=(h, w), mode='bilinear', align_corners=False)
     p5 = F.interpolate(p5, size=(h, w), mode='bilinear', align_corners=False)
     return torch.cat([p2, p3, p4, p5], dim=1)
Beispiel #28
0
def resize(
    img: Tensor,
    size: List[int],
    interpolation: str = "bilinear",
    max_size: Optional[int] = None,
    antialias: Optional[bool] = None,
) -> Tensor:
    _assert_image_tensor(img)

    if not isinstance(size, (int, tuple, list)):
        raise TypeError("Got inappropriate size arg")
    if not isinstance(interpolation, str):
        raise TypeError("Got inappropriate interpolation arg")

    if interpolation not in ["nearest", "bilinear", "bicubic"]:
        raise ValueError("This interpolation mode is unsupported with Tensor input")

    if isinstance(size, tuple):
        size = list(size)

    if isinstance(size, list):
        if len(size) not in [1, 2]:
            raise ValueError(
                f"Size must be an int or a 1 or 2 element tuple/list, not a {len(size)} element tuple/list"
            )
        if max_size is not None and len(size) != 1:
            raise ValueError(
                "max_size should only be passed if size specifies the length of the smaller edge, "
                "i.e. size should be an int or a sequence of length 1 in torchscript mode."
            )

    if antialias is None:
        antialias = False

    if antialias and interpolation not in ["bilinear", "bicubic"]:
        raise ValueError("Antialias option is supported for bilinear and bicubic interpolation modes only")

    _, h, w = get_dimensions(img)

    if isinstance(size, int) or len(size) == 1:  # specified size only for the smallest edge
        short, long = (w, h) if w <= h else (h, w)
        requested_new_short = size if isinstance(size, int) else size[0]

        new_short, new_long = requested_new_short, int(requested_new_short * long / short)

        if max_size is not None:
            if max_size <= requested_new_short:
                raise ValueError(
                    f"max_size = {max_size} must be strictly greater than the requested "
                    f"size for the smaller edge size = {size}"
                )
            if new_long > max_size:
                new_short, new_long = int(max_size * new_short / new_long), max_size

        new_w, new_h = (new_short, new_long) if w <= h else (new_long, new_short)

        if (w, h) == (new_w, new_h):
            return img

    else:  # specified both h and w
        new_w, new_h = size[1], size[0]

    img, need_cast, need_squeeze, out_dtype = _cast_squeeze_in(img, [torch.float32, torch.float64])

    # Define align_corners to avoid warnings
    align_corners = False if interpolation in ["bilinear", "bicubic"] else None

    img = interpolate(img, size=[new_h, new_w], mode=interpolation, align_corners=align_corners, antialias=antialias)

    if interpolation == "bicubic" and out_dtype == torch.uint8:
        img = img.clamp(min=0, max=255)

    img = _cast_squeeze_out(img, need_cast=need_cast, need_squeeze=need_squeeze, out_dtype=out_dtype)

    return img
 def forward(self, x):
     return F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode)
Beispiel #30
0
    test_dir = '/workspace/test_dir'
    filenames = [
        join(test_dir, x) for x in listdir(test_dir) if is_image_file(x)
    ]
    filenames.sort()
    output_dir = join(test_dir, "outputs")
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)

    bg_scale = 2
    for fn in filenames:
        img = Image.open(fn).convert("RGB")
        input_data = T.ToTensor()(img).unsqueeze(0).cuda()
        input_data = opt.bg_valid_blur(input_data)
        bg_upblur = F.interpolate(input_data,
                                  scale_factor=bg_scale,
                                  mode='bilinear',
                                  align_corners=False)
        bg_upblur = opt.bg_base_blur(bg_upblur)

        b, c, h, w = input_data.shape
        ph0, ph1, pw0, pw1 = [0] * 4
        min_sz = 256
        if h < min_sz or w < min_sz:

            ph0 = 0 if h > min_sz else (min_sz - h) // 2
            pw0 = 0 if w > min_sz else (min_sz - w) // 2

            ph1 = ph0
            pw1 = pw0
            if h + ph0 + ph1 < min_sz:
                ph1 += min_sz - h - ph0 - ph1
def _upsample(x, size):
    return F.interpolate(x, size=size, mode='bilinear', align_corners=True)
)

print(len(dataloader.dataset))
data = iter(dataloader)
for i in range(len(dataloader.dataset)):
    """Saves a generated sample from the validation set"""
    img = next(data)
    # print(img.unsqueeze(0).size())
    X1 = img.repeat(opt.style_dim, 1, 1, 1)
    X1 = Variable(X1.type(Tensor))
    # Get random style codes
    s_code = np.random.uniform(-1, 1, (opt.style_dim, opt.style_dim))
    s_code = Variable(Tensor(s_code))
    # Generate samples
    c_code_1, _ = Enc1(X1)
    X12 = Dec2(c_code_1, s_code)
    # Concatenate samples horisontally
    name = dataloader.dataset.files[i].split("/")[-1]

    for i, sample in enumerate(X12):
        tmp_name = name.split(".")[0] + "_" + str(i) + "." + name.split(
            ".")[-1]

        sample = F.interpolate(sample.unsqueeze(0),
                               size=(480, 640),
                               mode='bicubic')
        # sample = transform.resize(sample.unsqueeze(0), (480, 640))
        save_image(sample,
                   opt.output_location + "/" + tmp_name,
                   normalize=True)
    def _calculate_localization_map(self, inputs, labels=None):
        """
        Calculate localization map for all inputs with Grad-CAM.
        Args:
            inputs (list of tensor(s)): the input clips.
            labels (Optional[tensor]): labels of the current input clips.
        Returns:
            localization_maps (list of ndarray(s)): the localization map for
                each corresponding input.
            preds (tensor): shape (n_instances, n_class). Model predictions for `inputs`.
        """
        assert len(inputs) == len(
            self.target_layers
        ), "Must register the same number of target layers as the number of input pathways."
        input_clone = [inp.clone() for inp in inputs]
        preds = self.model(input_clone)

        if labels is None:
            score = torch.max(preds, dim=-1)[0]
        else:
            if labels.ndim == 1:
                labels = labels.unsqueeze(-1)
            score = torch.gather(preds, dim=1, index=labels)

        self.model.zero_grad()
        score = torch.sum(score)
        score.backward()
        localization_maps = []
        for i, inp in enumerate(inputs):
            _, _, T, H, W = inp.size()

            gradients = self.gradients[self.target_layers[i]]
            activations = self.activations[self.target_layers[i]]
            B, C, Tg, _, _ = gradients.size()

            weights = torch.mean(gradients.view(B, C, Tg, -1), dim=3)

            weights = weights.view(B, C, Tg, 1, 1)
            localization_map = torch.sum(
                weights * activations, dim=1, keepdim=True
            )
            localization_map = F.relu(localization_map)
            localization_map = F.interpolate(
                localization_map,
                size=(T, H, W),
                mode="trilinear",
                align_corners=False,
            )
            localization_map_min, localization_map_max = (
                torch.min(localization_map.view(B, -1), dim=-1, keepdim=True)[
                    0
                ],
                torch.max(localization_map.view(B, -1), dim=-1, keepdim=True)[
                    0
                ],
            )
            localization_map_min = torch.reshape(
                localization_map_min, shape=(B, 1, 1, 1, 1)
            )
            localization_map_max = torch.reshape(
                localization_map_max, shape=(B, 1, 1, 1, 1)
            )
            # Normalize the localization map.
            localization_map = (localization_map - localization_map_min) / (
                localization_map_max - localization_map_min + 1e-6
            )
            localization_map = localization_map.data

            localization_maps.append(localization_map)

        return localization_maps, preds
Beispiel #34
0
    def forward(self,
                input,
                label,
                step=0,
                alpha=-1,
                content_input=None,
                style_layer_begin=0,
                style_layer_end=-1):
        out_act = lambda x: x

        if style_layer_end == -1:  #content_input layer IDs go from 0 to (step). The local numbering is reversed so that, at 128x128 when step=5, id=0 <==> (step-5), id=1 <==> (step-4) etc.
            style_layer_end = step + 1

        style_layer_end = min(step + 1, style_layer_end)

        if style_layer_begin == -1 or style_layer_begin >= style_layer_end:
            return content_input

        assert (not input is None)

        # Label is reserved for future use. Make None if not in use. #label = self.label_embed(label)
        if not label is None:
            input = torch.cat([input, label], 1)

        batchN = input.size()[0]

        if args.stylefc > 0:
            input = self.z_preprocess[0](input)

        for anb in self.adanorm_blocks:
            anb.update(input)

        # For 3 levels of coarseness, for the 2 resnet-block layers, in both the generator and generator_running. Since the layers are just added to the global list without specific indices, the resulting index numbers are ad hoc but atm they are deterministically like here:
        def layers_for_block_depth(d, holder):
            # Generator layers start from 0 and running-Generatro from 17, or vice versa. For both, do the same styling.
            network_offset = int(len(holder.adanorm_blocks) / 2)  #17
            return [d * 2,
                    d * 2 + 1]  #, d*2+network_offset, d*2+network_offset+1]

        # The first conv call will start from a constant content_input defined as a class-level var in AdaNorm
        if content_input is None:
            out = torch.ones(512, 4, 4).to(device=input.device).repeat(
                batchN, 1, 1, 1)
        else:
            out = content_input

        block_offset = 0

        for i in range(style_layer_begin, style_layer_end):
            if i > 0 and not i in Generator.supportBlockPoints:
                if args.upsampling != 'bilinear':
                    upsample = F.upsample(out, scale_factor=2)
                else:
                    upsample = F.interpolate(out,
                                             align_corners=False,
                                             scale_factor=2,
                                             mode='bilinear')
            else:
                upsample = out

            out = upsample

            if i == 0 or not self.use_layer_noise:
                out = self.progression[i](out)
            else:
                out = self.progression[i].conv[0][0](out)
                out = self.progression[i].conv[0][1](out)
                out = self.noise[i][0](out)
                out = self.progression[i].conv[0][2](out)  #act
                if args.upsampling != 'bilinear':
                    out = self.progression[i].conv[0][3](out)  #Blur
                out = self.progression[i].conv[1](out)
                out = self.noise[i][1](out)
                out = self.progression[i].conv[2](out)  #act
                out = self.progression[i].conv[3](out)

        if style_layer_end == step + 1:  # The final layer is ALWAYS either to_rgb layer, or a mixture of 2 to-rgb_layers!
            out = out_act(self.to_rgb[step](out))

            if style_layer_end > 1 and 0 <= alpha < 1:
                skip_rgb = out_act(self.to_rgb[step - 1](upsample))
                if args.gnn:
                    channelwise_std = skip_rgb.std((0, 2, 3), keepdim=True)
                    channelwise_mean = skip_rgb.mean((0, 2, 3), keepdim=True)

                    out_std = out.std(dim=(0, 2, 3), keepdim=True)
                    out_mean = out.mean(dim=(0, 2, 3), keepdim=True)

                    skip_rgb = (skip_rgb - channelwise_mean) * (
                        out_std / channelwise_std) + out_mean

                out = (1 - alpha) * skip_rgb + alpha * out

        return out
Beispiel #35
0
def resize(image, size):
    image = F.interpolate(image.unsqueeze(0), size=size,
                          mode="nearest").squeeze(0)
    return image
Beispiel #36
0
    def forward(self, x):
        b, t, c, h, w = x.size()
        if self.hr_in:
            assert h % (self.upscale**2) == 0 and w % (self.upscale**2) == 0, (
                'The height and width must be multiple of {}.'.format(
                    self.upscale**2))
        else:
            assert h % self.upscale == 0 and w % self.upscale == 0, (
                'The height and width must be multiple of {}.'.format(
                    self.upscale))

        x_center = x[:, self.center_frame_idx, :, :, :].contiguous()

        # extract features for each frame
        # L1
        if self.with_predeblur:
            feat_l1 = self.conv_1x1(self.predeblur(x.view(-1, c, h, w)))
            if self.hr_in:
                h, w = h // self.upscale, w // self.upscale
        else:
            feat_l1 = self.lrelu(self.conv_first(x.view(-1, c, h, w)))

        feat_l1 = self.feature_extraction(feat_l1)
        # L2
        feat_l2 = self.lrelu(self.conv_l2_1(feat_l1))
        feat_l2 = self.lrelu(self.conv_l2_2(feat_l2))
        # L3
        feat_l3 = self.lrelu(self.conv_l3_1(feat_l2))
        feat_l3 = self.lrelu(self.conv_l3_2(feat_l3))

        feat_l1 = feat_l1.view(b, t, -1, h, w)
        feat_l2 = feat_l2.view(b, t, -1, h // 2, w // 2)
        feat_l3 = feat_l3.view(b, t, -1, h // 4, w // 4)

        # PCD alignment
        ref_feat_l = [  # reference feature list
            feat_l1[:, self.center_frame_idx, :, :, :].clone(),
            feat_l2[:, self.center_frame_idx, :, :, :].clone(),
            feat_l3[:, self.center_frame_idx, :, :, :].clone()
        ]
        aligned_feat = []
        for i in range(t):
            nbr_feat_l = [  # neighboring feature list
                feat_l1[:, i, :, :, :].clone(), feat_l2[:, i, :, :, :].clone(),
                feat_l3[:, i, :, :, :].clone()
            ]
            aligned_feat.append(self.pcd_align(nbr_feat_l, ref_feat_l))
        aligned_feat = torch.stack(aligned_feat, dim=1)  # (b, t, c, h, w)

        if not self.with_tsa:
            aligned_feat = aligned_feat.view(b, -1, h, w)
        feat = self.fusion(aligned_feat)

        out = self.reconstruction(feat)

        if self.add_rrdb:
            out = self.RRDB(out)

        for i in range(self.n_upscale):
            upconv = getattr(self, f'upconv{i+1}')
            out = self.lrelu(self.pixel_shuffle(upconv(out)))
        out = self.lrelu(self.conv_hr(out))
        out = self.conv_last(out)
        if self.hr_in:
            base = x_center
        else:
            base = F.interpolate(x_center,
                                 scale_factor=self.upscale,
                                 mode='bilinear',
                                 align_corners=False)
        out += base
        return out
Beispiel #37
0
    def forward(self, conv_out, output_switch=None, seg_size=None):

        output_dict = {k: None for k in output_switch.keys()}

        conv5 = conv_out[-1]
        input_size = conv5.size()
        ppm_out = [conv5]
        roi = [] # fake rois, just used for pooling
        for i in range(input_size[0]): # batch size
            roi.append(torch.Tensor([i, 0, 0, input_size[3], input_size[2]]).view(1, -1)) # b, x0, y0, x1, y1
        roi = torch.cat(roi, dim=0).type_as(conv5)
        ppm_out = [conv5]
        for pool_scale, pool_conv in zip(self.ppm_pooling, self.ppm_conv):
            ppm_out.append(pool_conv(F.interpolate(
                pool_scale(conv5, roi.detach()),
                (input_size[2], input_size[3]),
                mode='bilinear', align_corners=False)))
        ppm_out = torch.cat(ppm_out, 1)
        f = self.ppm_last_conv(ppm_out)

        if output_switch['scene']: # scene
            output_dict['scene'] = self.scene_head(f)

        if output_switch['object'] or output_switch['part'] or output_switch['material']:
            fpn_feature_list = [f]
            for i in reversed(range(len(conv_out) - 1)):
                conv_x = conv_out[i]
                conv_x = self.fpn_in[i](conv_x) # lateral branch

                f = F.interpolate(
                    f, size=conv_x.size()[2:], mode='bilinear', align_corners=False) # top-down branch
                f = conv_x + f

                fpn_feature_list.append(self.fpn_out[i](f))
            fpn_feature_list.reverse() # [P2 - P5]

            # material
            if output_switch['material']:
                output_dict['material'] = self.material_head(fpn_feature_list[0])

            if output_switch['object'] or output_switch['part']:
                output_size = fpn_feature_list[0].size()[2:]
                fusion_list = [fpn_feature_list[0]]
                for i in range(1, len(fpn_feature_list)):
                    fusion_list.append(F.interpolate(
                        fpn_feature_list[i],
                        output_size,
                        mode='bilinear', align_corners=False))
                fusion_out = torch.cat(fusion_list, 1)
                x = self.conv_fusion(fusion_out)

                if output_switch['object']: # object
                    output_dict['object'] = self.object_head(x)
                if output_switch['part']:
                    output_dict['part'] = self.part_head(x)

        if self.use_softmax:  # is True during inference
            # inference scene
            x = output_dict['scene']
            x = x.squeeze(3).squeeze(2)
            x = F.softmax(x, dim=1)
            output_dict['scene'] = x

            # inference object, material
            for k in ['object', 'material']:
                x = output_dict[k]
                x = F.interpolate(x, size=seg_size, mode='bilinear', align_corners=False)
                x = F.softmax(x, dim=1)
                output_dict[k] = x

            # inference part
            x = output_dict['part']
            x = F.interpolate(x, size=seg_size, mode='bilinear', align_corners=False)
            part_pred_list, head = [], 0
            for idx_part, object_label in enumerate(broden_dataset.object_with_part):
                n_part = len(broden_dataset.object_part[object_label])
                _x = F.interpolate(x[:, head: head + n_part], size=seg_size, mode='bilinear', align_corners=False)
                _x = F.softmax(_x, dim=1)
                part_pred_list.append(_x)
                head += n_part
            output_dict['part'] = part_pred_list

        else:   # Training
            # object, scene, material
            for k in ['object', 'scene', 'material']:
                if output_dict[k] is None:
                    continue
                x = output_dict[k]
                x = F.log_softmax(x, dim=1)
                if k == "scene":  # for scene
                    x = x.squeeze(3).squeeze(2)
                output_dict[k] = x
            if output_dict['part'] is not None:
                part_pred_list, head = [], 0
                for idx_part, object_label in enumerate(broden_dataset.object_with_part):
                    n_part = len(broden_dataset.object_part[object_label])
                    x = output_dict['part'][:, head: head + n_part]
                    x = F.log_softmax(x, dim=1)
                    part_pred_list.append(x)
                    head += n_part
                output_dict['part'] = part_pred_list

        return output_dict
def geo_tag(dataloader, args):
    # redirect to geo_tag_gps if dataset is of gps form:
    if (dataloader.dataset.geography_info_type == "GPS_LABEL"):
        print("redirecting to geo_tag_gps()...")
        return geo_tag_gps(dataloader, args)
    elif (dataloader.dataset.geography_info_type == "STRING_FORMATTED_LABEL"
          and dataloader.dataset.geography_label_string_type
          == "REGION_LABEL"):
        print("redirecting to geo_tag_region()...")
        return geo_tag_region(dataloader, args)
    country_tags = {}
    tag_to_subregion_features = {}
    categories = dataloader.dataset.categories
    iso3_to_subregion = pickle.load(
        open('util_files/iso3_to_subregion_mappings.pkl', 'rb'))
    unique_subregions = set(list(iso3_to_subregion.values()))

    # Extracts features from model pretrained on ImageNet
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    model = models.alexnet(pretrained=True).to(device)
    new_classifier = nn.Sequential(*list(model.classifier.children())[:-1])
    model.classifier = new_classifier
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    subregion_features = {}
    for subregion in unique_subregions:
        subregion_features[subregion] = []
    for cat in range(len(categories)):
        tag_to_subregion_features[cat] = copy.deepcopy(subregion_features)
    for i, (data, target) in enumerate(tqdm(dataloader)):
        if data is None:
            continue
        country = target[2][0]
        anns = target[0]
        filepath = target[3]
        this_categories = list(
            set([categories.index(ann['label']) for ann in anns]))
        subregion = iso3_to_subregion[country_to_iso3(country)]
        if country not in country_tags.keys():
            country_tags[country] = np.zeros(len(categories))
        this_features = None
        for cat in this_categories:
            if len(tag_to_subregion_features[cat][subregion]) < 500:
                data = normalize(data).to(device)
                big_data = F.interpolate(data.unsqueeze(0),
                                         size=224,
                                         mode='bilinear').to(device)
                this_features = model.forward(big_data)
                break
        for cat in this_categories:
            country_tags[country][cat] += 1
            if this_features is not None and len(
                    tag_to_subregion_features[cat][subregion]) < 500:
                tag_to_subregion_features[cat][subregion].append(
                    (this_features.data.cpu().numpy(), filepath))

    info_stats = {}
    info_stats['country_tags'] = country_tags
    info_stats['tag_to_subregion_features'] = tag_to_subregion_features
    pickle.dump(info_stats,
                open("results/{}/geo_tag.pkl".format(args.folder), "wb"))
 def forward(self, x):
     x = self.block(x)
     if self.upsample:
         x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
     return x
Beispiel #40
0
def downsample(masks):
    masks = F.interpolate(masks,scale_factor= 1/2, mode="bilinear",align_corners=True,recompute_scale_factor=True)   
    m = masks >= 0 #.5
    masks[m]  = 1
    masks[~m] = 0
    return masks   
Beispiel #41
0
def plot_heatmap(image, gt,label, model, layer):
    """
    function to plot the heat map on original transformed input image
    Args:
        x: the 1x3x512x512 pytorch tensor file that represents the NIH CXR should be consistent with cfg.CROP_SIZE
        label:user-supplied label you wish to get class activation map for; must be in FINDINGS list
        model: densenet121 trained on NIH CXR data
        layer: which layer's heat map to extract
    Returns:
        cam_torch: 224x224 torch tensor containing activation map
    """
    label_index = -1
    for i in range(14):
        if CLASS_NAMES[i] == label:
            label_index = i
            break
    assert label_index != -1
    res, fea = model(image)
    choosen = fea[layer]
    choosen = torch.sum(choosen, dim=1).unsqueeze(0)
    size = choosen.size()[2:]
    # bilinear pooling to make it becomes original size

    choosen = F.interpolate(choosen, list(size), mode="bilinear")
    raw_cam = choosen.detach()
    # create predictions for label of interest and all labels
    list_res = res[1][0].data.tolist()
    predx = ['%.3f' % elem for elem in list_res]

    fig, (showcxr, heatmap) = plt.subplots(ncols=2, figsize=(14, 5))

    hmap = sns.heatmap(raw_cam.squeeze(),
                       cmap='viridis',
                       alpha=0.3,  # whole heatmap is translucent
                       zorder=2, square=True, vmin=-5, vmax=5
                       )
    cxr = image.squeeze(0).permute(1,2,0).cpu().numpy()
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    # cxr = cxr * std + mean
    cxr = np.clip(cxr, 0, 1)
    LABEL = label
    # print cxr.size()
    hmap.imshow(cxr,
                aspect=hmap.get_aspect(),
                extent=hmap.get_xlim() + hmap.get_ylim(),
                zorder=1)  # put the map under the heatmap
    hmap.axis('off')
    hmap.set_title("P(" + LABEL + ")=" + str(predx[label_index]))

    showcxr.imshow(cxr)
    showcxr.axis('off')
    showcxr.set_title("Heat Map")
    plt.show()

    preds_concat = pd.concat([pd.Series(CLASS_NAMES), pd.Series(predx), pd.Series(gt.numpy().astype(bool)[0])], axis=1)
    preds = pd.DataFrame(data=preds_concat)
    preds.columns = ["Finding", "Predicted Probability", "Ground Truth"]
    preds.set_index("Finding", inplace=True)
    preds.sort_values(by='Predicted Probability', inplace=True, ascending=False)
    return preds
 def __upsample_add(self, x, y):
     return F.interpolate(x, size=y.size()[2:], mode='bilinear', align_corners=False) + y
Beispiel #43
0
    def lincomb_mask_loss(self,
                          pos,
                          idx_t,
                          loc_data,
                          mask_data,
                          priors,
                          proto_data,
                          masks,
                          gt_box_t,
                          inst_data,
                          interpolation_mode='bilinear'):
        mask_h = proto_data.size(1)
        mask_w = proto_data.size(2)

        process_gt_bboxes = cfg.mask_proto_normalize_emulate_roi_pooling or cfg.mask_proto_crop

        if cfg.mask_proto_remove_empty_masks:
            # Make sure to store a copy of this because we edit it to get rid of all-zero masks
            pos = pos.clone()

        loss_m = 0
        loss_d = 0  # Coefficient diversity loss

        for idx in range(mask_data.size(0)):
            with torch.no_grad():
                downsampled_masks = F.interpolate(
                    masks[idx].unsqueeze(0), (mask_h, mask_w),
                    mode=interpolation_mode,
                    align_corners=False).squeeze(0)
                downsampled_masks = downsampled_masks.permute(1, 2,
                                                              0).contiguous()

                if cfg.mask_proto_binarize_downsampled_gt:
                    downsampled_masks = downsampled_masks.gt(0.5).float()

                if cfg.mask_proto_remove_empty_masks:
                    # Get rid of gt masks that are so small they get downsampled away
                    very_small_masks = (downsampled_masks.sum(dim=(0, 1)) <=
                                        0.0001)
                    for i in range(very_small_masks.size(0)):
                        if very_small_masks[i]:
                            pos[idx, idx_t[idx] == i] = 0

                if cfg.mask_proto_reweight_mask_loss:
                    # Ensure that the gt is binary
                    if not cfg.mask_proto_binarize_downsampled_gt:
                        bin_gt = downsampled_masks.gt(0.5).float()
                    else:
                        bin_gt = downsampled_masks

                    gt_foreground_norm = bin_gt / (
                        torch.sum(bin_gt, dim=(0, 1), keepdim=True) + 0.0001)
                    gt_background_norm = (1 - bin_gt) / (torch.sum(
                        1 - bin_gt, dim=(0, 1), keepdim=True) + 0.0001)

                    mask_reweighting = gt_foreground_norm * cfg.mask_proto_reweight_coeff + gt_background_norm
                    mask_reweighting *= mask_h * mask_w

            cur_pos = pos[idx]
            pos_idx_t = idx_t[idx, cur_pos]

            if process_gt_bboxes:
                # Note: this is in point-form
                pos_gt_box_t = gt_box_t[idx, cur_pos]

            if pos_idx_t.size(0) == 0:
                continue

            proto_masks = proto_data[idx]
            proto_coef = mask_data[idx, cur_pos, :]

            if cfg.mask_proto_coeff_diversity_loss:
                if inst_data is not None:
                    div_coeffs = inst_data[idx, cur_pos, :]
                else:
                    div_coeffs = proto_coef

                loss_d += self.coeff_diversity_loss(div_coeffs, pos_idx_t)

            # If we have over the allowed number of masks, select a random sample
            old_num_pos = proto_coef.size(0)
            if old_num_pos > cfg.masks_to_train:
                perm = torch.randperm(proto_coef.size(0))
                select = perm[:cfg.masks_to_train]

                proto_coef = proto_coef[select, :]
                pos_idx_t = pos_idx_t[select]

                if process_gt_bboxes:
                    pos_gt_box_t = pos_gt_box_t[select, :]

            num_pos = proto_coef.size(0)
            mask_t = downsampled_masks[:, :, pos_idx_t]

            # Size: [mask_h, mask_w, num_pos]
            pred_masks = proto_masks @ proto_coef.t()
            pred_masks = cfg.mask_proto_mask_activation(pred_masks)

            if cfg.mask_proto_double_loss:
                if cfg.mask_proto_mask_activation == activation_func.sigmoid:
                    pre_loss = F.binary_cross_entropy(torch.clamp(
                        pred_masks, 0, 1),
                                                      mask_t,
                                                      reduction='sum')
                else:
                    pre_loss = F.smooth_l1_loss(pred_masks,
                                                mask_t,
                                                reduction='sum')

                loss_m += cfg.mask_proto_double_loss_alpha * pre_loss

            if cfg.mask_proto_crop:
                pred_masks = crop(pred_masks, pos_gt_box_t)

            if cfg.mask_proto_mask_activation == activation_func.sigmoid:
                pre_loss = F.binary_cross_entropy(torch.clamp(
                    pred_masks, 0, 1),
                                                  mask_t,
                                                  reduction='none')
            else:
                pre_loss = F.smooth_l1_loss(pred_masks,
                                            mask_t,
                                            reduction='none')

            if cfg.mask_proto_normalize_mask_loss_by_sqrt_area:
                gt_area = torch.sum(mask_t, dim=(0, 1), keepdim=True)
                pre_loss = pre_loss / (torch.sqrt(gt_area) + 0.0001)

            if cfg.mask_proto_reweight_mask_loss:
                pre_loss = pre_loss * mask_reweighting[:, :, pos_idx_t]

            if cfg.mask_proto_normalize_emulate_roi_pooling:
                weight = mask_h * mask_w if cfg.mask_proto_crop else 1
                pos_get_csize = center_size(pos_gt_box_t)
                gt_box_width = pos_get_csize[:, 2] * mask_w
                gt_box_height = pos_get_csize[:, 3] * mask_h
                pre_loss = pre_loss.sum(
                    dim=(0, 1)) / gt_box_width / gt_box_height * weight

            # If the number of masks were limited scale the loss accordingly
            if old_num_pos > num_pos:
                pre_loss *= old_num_pos / num_pos

            loss_m += torch.sum(pre_loss)

        losses = {'M': loss_m * cfg.mask_alpha / mask_h / mask_w}

        if cfg.mask_proto_coeff_diversity_loss:
            losses['D'] = loss_d

        return losses
Beispiel #44
0
def upsample(x):
    """Upsample input tensor by a factor of 2
    """
    return F.interpolate(x, scale_factor=2, mode="nearest")
Beispiel #45
0
def train(args, train_loader, disp_net, pose_exp_net, optimizer, epoch_size, logger, train_writer):
    global n_iter, device
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter(precision=4)
    w1, w2, w3 = args.photo_loss_weight, args.mask_loss_weight, args.smooth_loss_weight

    # switch to train mode
    disp_net.train()
    pose_exp_net.train()

    end = time.time()
    logger.train_bar.update(0)

    for i, (tgt_img, ref_imgs, intrinsics, intrinsics_inv) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        tgt_img = tgt_img.to(device)
        ref_imgs = [img.to(device) for img in ref_imgs]
        intrinsics = intrinsics.to(device)
        intrinsics_inv = intrinsics_inv.to(device)

        # compute output
        disparities = disp_net(tgt_img)
        depth = [1/disp for disp in disparities]
        explainability_mask, pose = pose_exp_net(tgt_img, ref_imgs)

        loss_1 = photometric_reconstruction_loss(tgt_img, ref_imgs,
                                                 intrinsics, intrinsics_inv,
                                                 depth, explainability_mask, pose,
                                                 args.rotation_mode, args.padding_mode)
        if w2 > 0:
            loss_2 = explainability_loss(explainability_mask)
        else:
            loss_2 = 0
        loss_3 = smooth_loss(depth)

        loss = w1*loss_1 + w2*loss_2 + w3*loss_3

        if i > 0 and n_iter % args.print_freq == 0:
            train_writer.add_scalar('photometric_error', loss_1.item(), n_iter)
            if w2 > 0:
                train_writer.add_scalar('explanability_loss', loss_2.item(), n_iter)
            train_writer.add_scalar('disparity_smoothness_loss', loss_3.item(), n_iter)
            train_writer.add_scalar('total_loss', loss.item(), n_iter)

        if args.training_output_freq > 0 and n_iter % args.training_output_freq == 0:

            train_writer.add_image('train Input', tensor2array(tgt_img[0]), n_iter)

            for k, scaled_depth in enumerate(depth):
                train_writer.add_image('train Dispnet Output Normalized {}'.format(k),
                                       tensor2array(disparities[k][0], max_value=None, colormap='bone'),
                                       n_iter)
                train_writer.add_image('train Depth Output Normalized {}'.format(k),
                                       tensor2array(1/disparities[k][0], max_value=None),
                                       n_iter)
                b, _, h, w = scaled_depth.size()
                downscale = tgt_img.size(2)/h

                tgt_img_scaled = F.interpolate(tgt_img, (h, w), mode='area')
                ref_imgs_scaled = [F.interpolate(ref_img, (h, w), mode='area') for ref_img in ref_imgs]

                intrinsics_scaled = torch.cat((intrinsics[:, 0:2]/downscale, intrinsics[:, 2:]), dim=1)
                intrinsics_scaled_inv = torch.cat((intrinsics_inv[:, :, 0:2]*downscale, intrinsics_inv[:, :, 2:]), dim=2)

                # log warped images along with explainability mask
                for j,ref in enumerate(ref_imgs_scaled):
                    ref_warped = inverse_warp(ref, scaled_depth[:,0], pose[:,j],
                                              intrinsics_scaled, intrinsics_scaled_inv,
                                              rotation_mode=args.rotation_mode,
                                              padding_mode=args.padding_mode)[0]
                    train_writer.add_image('train Warped Outputs {} {}'.format(k,j),
                                           tensor2array(ref_warped),
                                           n_iter)
                    train_writer.add_image('train Diff Outputs {} {}'.format(k,j),
                                           tensor2array(0.5*(tgt_img_scaled[0] - ref_warped).abs()),
                                           n_iter)
                    if explainability_mask[k] is not None:
                        train_writer.add_image('train Exp mask Outputs {} {}'.format(k,j),
                                               tensor2array(explainability_mask[k][0,j], max_value=1, colormap='bone'),
                                               n_iter)

        # record loss and EPE
        losses.update(loss.item(), args.batch_size)

        # compute gradient and do Adam step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        with open(args.save_path/args.log_full, 'a') as csvfile:
            writer = csv.writer(csvfile, delimiter='\t')
            writer.writerow([loss.item(), loss_1.item(), loss_2.item() if w2 > 0 else 0, loss_3.item()])
        logger.train_bar.update(i+1)
        if i % args.print_freq == 0:
            logger.train_writer.write('Train: Time {} Data {} Loss {}'.format(batch_time, data_time, losses))
        if i >= epoch_size - 1:
            break

        n_iter += 1

    return losses.avg[0]
 def split_feats(self, feats):
     return (F.interpolate(feats['p2'], scale_factor=0.5, mode='bilinear'),
             feats['p3'],
             feats['p4'],
             feats['p5'],
             F.interpolate(feats['p6'], size=feats['p5'].shape[-2:], mode='bilinear'))