Beispiel #1
0
 def execute(self, inputs):
     assert len(inputs) == len(self.in_channels)
     outs = []
     outs.append(inputs[0])
     for i in range(1, len(inputs)):
         outs.append(
             nn.interpolate(inputs[i], scale_factor=2**i, mode='bilinear'))
     out = jt.contrib.concat(outs, dim=1)
     '''
     if out.requires_grad and self.with_checkpoint:
         out = checkpoint(self.reduction_conv, out)
     else:
         out = self.reduction_conv(out)
     '''
     out = self.reduction_conv(out)
     outs = [out]
     for i in range(1, self.num_level):
         outs.append(
             nn.pool(out, kernel_size=2**i, stride=2**i, op=self.pooling))
     outputs = []
     if self.share_conv:
         for i in range(self.num_level):
             outputs.append(self.fpn_conv(outs[i]))
     else:
         for i in range(self.num_level):
             if not outs[i].is_stop_grad() and self.with_checkpoint:
                 tmp_out = checkpoint(self.fpn_conv[i], outs[i])
             else:
                 tmp_out = self.fpn_conv[i](outs[i])
             outputs.append(tmp_out)
     return tuple(outputs)
Beispiel #2
0
    def semantic_segmentation_loss(self,
                                   segment_data,
                                   mask_t,
                                   class_t,
                                   interpolation_mode='bilinear'):
        # Note num_classes here is without the background class so cfg.num_classes-1
        batch_size, num_classes, mask_h, mask_w = segment_data.shape
        loss_s = 0

        for idx in range(batch_size):
            cur_segment = segment_data[idx]
            cur_class_t = class_t[idx]

            with jt.no_grad():
                downsampled_masks = nn.interpolate(
                    mask_t[idx].unsqueeze(0), (mask_h, mask_w),
                    mode=interpolation_mode,
                    align_corners=False).squeeze(0)
                downsampled_masks = (downsampled_masks > 0.5).float()

                # Construct Semantic Segmentation
                segment_t = jt.zeros_like(cur_segment)
                segment_t.stop_grad()
                for obj_idx in range(downsampled_masks.shape[0]):
                    segment_t[cur_class_t[obj_idx]] = jt.maximum(
                        segment_t[cur_class_t[obj_idx]],
                        downsampled_masks[obj_idx])

            loss_s += nn.BCEWithLogitsLoss(size_average=False)(cur_segment,
                                                               segment_t)

        return loss_s / mask_h / mask_w * cfg.semantic_segmentation_alpha
Beispiel #3
0
    def execute(self, img):
        
        # img assumed to be a pytorch BGR image with channel order [n, h, w, c]
        if cfg.preserve_aspect_ratio:
            _, h, w, _ = img.size()
            img_size = Resize.calc_size_preserve_ar(w, h, cfg.max_size)
            img_size = (img_size[1], img_size[0]) # Pytorch needs h, w
        else:
            img_size = (cfg.max_size, cfg.max_size)

        img = img.permute(0, 3, 1, 2)
        img = nn.interpolate(img, img_size, mode='bilinear', align_corners=False)

        if self.transform.normalize:
            img = (img - self.mean) / self.std
        elif self.transform.subtract_means:
            img = (img - self.mean)
        elif self.transform.to_float:
            img = img / 255
        
        if self.transform.channel_order != 'RGB':
            raise NotImplementedError
        
        img = img[:, [2, 1, 0], :, :]

        # Return value is in channel order [n, c, h, w] and RGB
        return img
Beispiel #4
0
    def execute(self, x):
        """
        Arguments:
            x (list[Tensor]): feature maps for each feature level.
        Returns:
            results (tuple[Tensor]): feature maps after FPN layers.
                They are ordered from highest resolution first.
        """
        last_inner = getattr(self, self.inner_blocks[-1])(x[-1])
        results = []
        results.append(getattr(self, self.layer_blocks[-1])(last_inner))
        for feature, inner_block, layer_block in zip(
                x[:-1][::-1], self.inner_blocks[:-1][::-1],
                self.layer_blocks[:-1][::-1]):
            if not inner_block:
                continue
            inner_top_down = nn.interpolate(last_inner,
                                            scale_factor=2,
                                            mode="nearest")
            inner_lateral = getattr(self, inner_block)(feature)
            # TODO use size instead of scale to make it robust to different sizes
            # inner_top_down = F.upsample(last_inner, size=inner_lateral.shape[-2:],
            # mode='bilinear', align_corners=False)
            last_inner = inner_lateral + inner_top_down
            results.insert(0, getattr(self, layer_block)(last_inner))

        if isinstance(self.top_blocks, LastLevelP6P7):
            last_results = self.top_blocks(x[-1], results[-1])
            results.extend(last_results)
        elif isinstance(self.top_blocks, LastLevelMaxPool):
            last_results = self.top_blocks(results[-1])
            results.extend(last_results)
        return tuple(results)
Beispiel #5
0
    def create_grid(samples, scale_factor, img_file):
        """
        utility function to create a grid of GAN samples

        :param samples: generated samples for storing
        :param scale_factor: factor for upscaling the image
        :param img_file: name of file to write
        :return: None (saves a file)
        """
        # from torchvision.utils import save_image
        # from torch.nn.functional import interpolate

        # upsample the image
        if scale_factor > 1:
            # samples = interpolate(samples, scale_factor=scale_factor)
            samples = nn.interpolate(samples,
                                     scale_factor=scale_factor,
                                     mode='nearest')

        # save the images:
        # save_image(samples, img_file, nrow=int(np.sqrt(len(samples))),
        #            normalize=True, scale_each=True, pad_value=128, padding=1)
        # print(samples)
        jt.save_image_my(samples,
                         img_file,
                         nrow=int(np.sqrt(len(samples))),
                         normalize=True,
                         scale_each=True,
                         pad_value=128,
                         padding=1)
def interpolate(input,
                size=None,
                scale_factor=None,
                mode="nearest",
                align_corners=None):
    if input.numel() > 0:
        return nn.interpolate(input, size, scale_factor, mode, align_corners)

    def _check_size_scale_factor(dim):
        if size is None and scale_factor is None:
            raise ValueError("either size or scale_factor should be defined")
        if size is not None and scale_factor is not None:
            raise ValueError(
                "only one of size or scale_factor should be defined")
        if (scale_factor is not None and isinstance(scale_factor, tuple)
                and len(scale_factor) != dim):
            raise ValueError("scale_factor shape must match input shape. "
                             "Input is {}D, scale_factor size is {}".format(
                                 dim, len(scale_factor)))

    def _output_size(dim):
        _check_size_scale_factor(dim)
        if size is not None:
            return size
        scale_factors = _ntuple(dim)(scale_factor)
        # math.floor might return float in py2.7
        return [
            int(math.floor(input.size(i + 2) * scale_factors[i]))
            for i in range(dim)
        ]

    output_shape = tuple(_output_size(2))
    output_shape = input.shape[:-2] + output_shape
    return _NewEmptyTensorOp()(input, output_shape)
Beispiel #7
0
    def execute(self, style, noise, step=0, alpha=-1, mixing_range=(-1,-1)):
        out = noise[0]
        if len(style) < 2:
            inject_index = [len(self.progression) + 1]
        else:
            inject_index = sorted(random.sample(list(range(step)), len(style) - 1))
        crossover = 0

        for i, (conv, to_rgb) in enumerate(zip(self.progression, self.to_rgb)):
            if mixing_range == (-1, -1):
                if crossover < len(inject_index) and i > inject_index[crossover]:
                    crossover = min(crossover + 1, len(style))
                style_step = style[crossover]
            else:
                if mixing_range[0] <= i <= mixing_range[1]:
                    style_step = style[1]
                else:
                    style_step = style[0]

            if i > 0 and step > 0:
                out_prev = out

            out = conv(out, style_step, noise[i])
            
            if i == step:
                out = to_rgb(out)

                if i > 0 and 0 <= alpha < 1:
                    skip_rgb = self.to_rgb[i - 1](out_prev)
                    skip_rgb = nn.interpolate(skip_rgb, scale_factor=2, mode='nearest') # F
                    out = (1 - alpha) * skip_rgb + alpha * out
                break
        return out
Beispiel #8
0
    def compute_prediction(self, original_image):
        """
        Arguments:
            original_image (np.ndarray): an image as returned by OpenCV

        Returns:
            prediction (BoxList): the detected objects. Additional information
                of the detection properties can be found in the fields of
                the BoxList via `prediction.fields()`
        """
        # apply pre-processing to image
        image = self.transforms(original_image)
        # convert to an ImageList, padded so that it is divisible by
        # cfg.DATALOADER.SIZE_DIVISIBILITY
        image_list = to_image_list(image,
                                   self.cfg.DATALOADER.SIZE_DIVISIBILITY)
        # compute predictions

        with jt.no_grad():
            predictions = self.model(image_list)

        # always single image is passed at a time
        prediction = predictions[0]

        # reshape prediction (a BoxList) into the original image size
        height, width = original_image.shape[:-1]
        input_w, input_h = prediction.size

        prediction = prediction.resize((width, height))

        if prediction.has_field("mask"):
            # if we have masks, paste the masks in the right position
            # in the image, as defined by the bounding boxes
            masks = prediction.get_field("mask")

            if masks.ndim == 3:
                # resize masks

                stride_mask = float(prediction.get_field('stride').item())
                h = math.ceil(masks.shape[1] * stride_mask * height / input_h)
                w = math.ceil(masks.shape[2] * stride_mask * width / input_w)
                mask_th = prediction.get_field('mask_th')
                masks = masks
                masks = nn.interpolate(X=masks.unsqueeze(1).float(),
                                       size=(h, w),
                                       mode="bilinear",
                                       align_corners=False) > mask_th
                masks = masks[:, :, :height, :width]

                #masks = masks.unsqueeze(1)
                prediction.add_field("mask", masks)
            else:
                # always single image is passed at a time
                masks = self.masker([masks], [prediction])[0]

                prediction.add_field("mask", masks)

        return prediction
Beispiel #9
0
 def upsample_cat(self, p1, p2, p3, p4):
     p1 = nn.interpolate(p1,
                         size=(self.numAngle, self.numRho),
                         mode='bilinear',
                         align_corners=True)
     p2 = nn.interpolate(p2,
                         size=(self.numAngle, self.numRho),
                         mode='bilinear',
                         align_corners=True)
     p3 = nn.interpolate(p3,
                         size=(self.numAngle, self.numRho),
                         mode='bilinear',
                         align_corners=True)
     p4 = nn.interpolate(p4,
                         size=(self.numAngle, self.numRho),
                         mode='bilinear',
                         align_corners=True)
     return jt.concat([p1, p2, p3, p4], dim=1)
Beispiel #10
0
    def execute(self, convouts: List[jt.Var]):
        """
        Args:
            - convouts (list): A list of convouts for the corresponding layers in in_channels.
        Returns:
            - A list of FPN convouts in the same order as x with extra downsample layers if requested.
        """

        out = []
        x = jt.zeros((1, ))
        for i in range(len(convouts)):
            out.append(x)

        # For backward compatability, the conv layers are stored in reverse but the input and output is
        # given in the correct order. Thus, use j=-i-1 for the input and output and i for the conv layers.
        j = len(convouts)
        for lat_layer in self.lat_layers.layers.values():
            j -= 1

            if j < len(convouts) - 1:
                _, _, h, w = convouts[j].shape
                #print('hh',(h,w),x.shape[-2:])
                x = nn.interpolate(x,
                                   size=(h, w),
                                   mode=self.interpolation_mode,
                                   align_corners=False)
                # x = interpolate(x, size=(h, w), mode=self.interpolation_mode, align_corners=False)

            x = x + lat_layer(convouts[j])
            out[j] = x

        # This janky second loop is here because jtScript.
        j = len(convouts)
        for pred_layer in self.pred_layers.layers.values():
            j -= 1
            out[j] = pred_layer(out[j])

            if self.relu_pred_layers:
                out[j] = nn.relu(out[j])

        cur_idx = len(out)

        # In the original paper, this takes care of P6
        if self.use_conv_downsample:
            for downsample_layer in self.downsample_layers.layers.values():
                out.append(downsample_layer(out[-1]))
        else:
            for idx in range(self.num_downsample):
                # Note: this is an untested alternative to out.append(out[-1][:, :, ::2, ::2]). Thanks jtScript.
                out.append(nn.pool(out[-1], 1, stride=2, op='maximum'))

        if self.relu_downsample_layers:
            for idx in range(len(out) - cur_idx):
                out[idx] = nn.relu(out[idx + cur_idx])

        return out
Beispiel #11
0
def enforce_size(img, targets, masks, num_crowds, new_w, new_h):
    """ Ensures that the image is the given size without distorting aspect ratio. """
    with jt.no_grad():
        _, h, w = img.size()

        if h == new_h and w == new_w:
            return img, targets, masks, num_crowds

        # Resize the image so that it fits within new_w, new_h
        w_prime = new_w
        h_prime = h * new_w / w

        if h_prime > new_h:
            w_prime *= new_h / h_prime
            h_prime = new_h

        w_prime = int(w_prime)
        h_prime = int(h_prime)

        # Do all the resizing
        img = nn.interpolate(img.unsqueeze(0), (h_prime, w_prime),
                             mode='bilinear',
                             align_corners=False)
        img.squeeze_(0)

        # Act like each object is a color channel
        masks = nn.interpolate(masks.unsqueeze(0), (h_prime, w_prime),
                               mode='bilinear',
                               align_corners=False)
        masks.squeeze_(0)

        # Scale bounding boxes (this will put them in the top left corner in the case of padding)
        targets[:, [0, 2]] *= (w_prime / new_w)
        targets[:, [1, 3]] *= (h_prime / new_h)

        # Finally, pad everything to be the new_w, new_h
        pad_dims = (0, new_w - w_prime, 0, new_h - h_prime)
        img = F.pad(img, pad_dims, mode='constant', value=0)
        masks = F.pad(masks, pad_dims, mode='constant', value=0)

        return img, targets, masks, num_crowds
Beispiel #12
0
def scale_img(img, ratio=1.0, same_shape=False, gs=32):  # img(16,3,256,416)
    # scales img(bs,3,y,x) by ratio constrained to gs-multiple
    if ratio == 1.0:
        return img
    else:
        h, w = img.shape[2:]
        s = (int(h * ratio), int(w * ratio))  # new size
        img = nn.interpolate(img, size=s, mode='bilinear',
                             align_corners=False)  # resize
        if not same_shape:  # pad/crop img
            h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)]
        return nn.pad(img, [0, w - s[1], 0, h - s[0]],
                      value=0.447)  # value = imagenet mean
Beispiel #13
0
    def forward_for_mask(self, boxlists, pixel_embed):
        N, dim, m_h, m_w = pixel_embed.shape
        new_boxlists = []
        stride = self.fpn_strides[0] / self.mask_scale_factor
        for im in range(N):
            boxlist = boxlists[im]
            boxes = boxlist.bbox
            input_w, input_h = boxlist.size
            proposal_embed = boxlist.get_field('proposal_embed')
            if proposal_embed.shape[0] == 0:
                new_boxlist = BoxList(boxes, boxlist.size, mode="xyxy")
                new_boxlist.add_field("labels", boxlist.get_field("labels"))
                new_boxlist.add_field("scores", boxlist.get_field("scores"))
                new_boxlist.add_field('mask', jt.array([]))
                if self.post_process_masks:
                    new_boxlist.add_field('stride', jt.array([1]))
                    new_boxlist.add_field('mask_th', jt.array([0.0]))
                else:
                    new_boxlist.add_field('stride', jt.array([stride]))
                    new_boxlist.add_field('mask_th', jt.array([self.mask_th]))

                new_boxlists.append(new_boxlist)
                continue

            mask_boxes = boxes / stride
            box_masks = boxes_to_masks(mask_boxes, m_h, m_w)
            proposal_margin = boxlist.get_field('proposal_margin')
            mask_prob = self.compute_mask_prob(pixel_embed[im], proposal_embed,
                                               proposal_margin, mask_boxes)
            masks = mask_prob * box_masks.float()

            if self.post_process_masks:
                masks = nn.interpolate(X=masks.unsqueeze(1).float(),
                                       scale_factor=stride,
                                       mode="bilinear",
                                       align_corners=False) > self.mask_th
                masks = masks[:, 0, :input_h, :input_w]
            new_boxlist = BoxList(boxes, boxlist.size, mode="xyxy")
            new_boxlist.add_field('mask', masks)
            new_boxlist.add_field("labels", boxlist.get_field("labels"))
            new_boxlist.add_field("scores", boxlist.get_field("scores"))
            if self.post_process_masks:
                new_boxlist.add_field('stride', jt.array([1]))
                new_boxlist.add_field('mask_th', jt.array([0.0]))
            else:
                new_boxlist.add_field('stride', jt.array([stride]))
                new_boxlist.add_field('mask_th', jt.array([self.mask_th]))

            new_boxlists.append(new_boxlist)

        return new_boxlists
Beispiel #14
0
    def __progressive_down_sampling(self, real_batch, depth, alpha):
        """
        private helper for down_sampling the original images in order to facilitate the
        progressive growing of the layers.

        :param real_batch: batch of real samples
        :param depth: depth at which training is going on
        :param alpha: current value of the fade-in alpha
        :return: real_samples => modified real batch of samples
        """

        # from torch.nn import AvgPool2d
        # from torch.nn.functional import interpolate

        if self.structure == 'fixed':
            return real_batch

        # down_sample the real_batch for the given depth
        down_sample_factor = int(np.power(2, self.depth - depth - 1))
        prior_down_sample_factor = max(int(np.power(2, self.depth - depth)), 0)

        # ds_real_samples = AvgPool2d(down_sample_factor)(real_batch)
        ds_real_samples = nn.Pool(down_sample_factor)(real_batch)

        if depth > 0:
            # prior_ds_real_samples = interpolate(AvgPool2d(prior_down_sample_factor)(real_batch), scale_factor=2)
            prior_ds_real_samples = nn.interpolate(
                nn.Pool(prior_down_sample_factor)(real_batch),
                scale_factor=2,
                mode='nearest')
        else:
            prior_ds_real_samples = ds_real_samples

        # real samples are a combination of ds_real_samples and prior_ds_real_samples
        real_samples = (alpha * ds_real_samples) + (
            (1 - alpha) * prior_ds_real_samples)

        # return the so computed real_samples
        return real_samples
Beispiel #15
0
    def lincomb_mask_loss(self, pos, idx_t, loc_data, mask_data, priors, proto_data, masks, gt_box_t, score_data, inst_data, labels, interpolation_mode='bilinear'):
        mask_h = proto_data.shape[1]
        mask_w = proto_data.shape[2]


        process_gt_bboxes = cfg.mask_proto_normalize_emulate_roi_pooling or cfg.mask_proto_crop

        if cfg.mask_proto_remove_empty_masks:
            # Make sure to store a copy of this because we edit it to get rid of all-zero masks
            pos = pos.clone()

        loss_m = 0
        loss_d = 0 # Coefficient diversity loss

        maskiou_t_list = []
        maskiou_net_input_list = []
        label_t_list = []

        for idx in range(mask_data.shape[0]):
            with jt.no_grad():
                downsampled_masks = nn.interpolate(masks[idx].unsqueeze(0), (mask_h, mask_w),
                                                  mode=interpolation_mode, align_corners=False).squeeze(0)
                downsampled_masks = downsampled_masks.permute(1, 2, 0)

                if cfg.mask_proto_binarize_downsampled_gt:
                    downsampled_masks = (downsampled_masks>0.5).float()

                if cfg.mask_proto_remove_empty_masks:
                    # Get rid of gt masks that are so small they get downsampled away
                    very_small_masks = (downsampled_masks.sum(0).sum(0) <= 0.0001)
                    for i in range(very_small_masks.shape[0]):
                        if very_small_masks[i]:
                            pos[idx, idx_t[idx] == i] = 0

                if cfg.mask_proto_reweight_mask_loss:
                    # Ensure that the gt is binary
                    if not cfg.mask_proto_binarize_downsampled_gt:
                        bin_gt = (downsampled_masks>0.5).float()
                    else:
                        bin_gt = downsampled_masks

                    gt_foreground_norm = bin_gt     / (jt.sum(bin_gt,   dim=(0,1), keepdim=True) + 0.0001)
                    gt_background_norm = (1-bin_gt) / (jt.sum(1-bin_gt, dim=(0,1), keepdim=True) + 0.0001)

                    mask_reweighting   = gt_foreground_norm * cfg.mask_proto_reweight_coeff + gt_background_norm
                    mask_reweighting  *= mask_h * mask_w

            cur_pos = pos[idx]
            cur_pos = jt.where(cur_pos)[0]
            pos_idx_t = idx_t[idx, cur_pos]
            
            if process_gt_bboxes:
                # Note: this is in point-form
                if cfg.mask_proto_crop_with_pred_box:
                    pos_gt_box_t = decode(loc_data[idx, :, :], priors.data, cfg.use_yolo_regressors)[cur_pos]
                else:
                    pos_gt_box_t = gt_box_t[idx, cur_pos]

            if pos_idx_t.shape[0] == 0:
                continue

            proto_masks = proto_data[idx]
            proto_coef  = mask_data[idx, cur_pos, :]
            if cfg.use_mask_scoring:
                mask_scores = score_data[idx, cur_pos, :]

            if cfg.mask_proto_coeff_diversity_loss:
                if inst_data is not None:
                    div_coeffs = inst_data[idx, cur_pos, :]
                else:
                    div_coeffs = proto_coef

                loss_d += self.coeff_diversity_loss(div_coeffs, pos_idx_t)
            
            # If we have over the allowed number of masks, select a random sample
            old_num_pos = proto_coef.shape[0]
            if old_num_pos > cfg.masks_to_train:
                perm = jt.randperm(proto_coef.shape[0])
                select = perm[:cfg.masks_to_train]

                proto_coef = proto_coef[select, :]
                pos_idx_t  = pos_idx_t[select]
                
                if process_gt_bboxes:
                    pos_gt_box_t = pos_gt_box_t[select, :]
                if cfg.use_mask_scoring:
                    mask_scores = mask_scores[select, :]

            num_pos = proto_coef.shape[0]
            mask_t = downsampled_masks[:, :, pos_idx_t]     
            label_t = labels[idx][pos_idx_t]     

            # Size: [mask_h, mask_w, num_pos]
            pred_masks = proto_masks @ proto_coef.transpose(1,0)

            pred_masks = cfg.mask_proto_mask_activation(pred_masks)

            if cfg.mask_proto_double_loss:
                if cfg.mask_proto_mask_activation == activation_func.sigmoid:
                    pre_loss = nn.bce_loss(jt.clamp(pred_masks, 0, 1), mask_t, size_average=False)
                else:
                    pre_loss = nn.smooth_l1_loss(pred_masks, mask_t, reduction='sum')
                
                loss_m += cfg.mask_proto_double_loss_alpha * pre_loss

            if cfg.mask_proto_crop:
                pred_masks = crop(pred_masks, pos_gt_box_t)
            
            if cfg.mask_proto_mask_activation == activation_func.sigmoid:
                pre_loss = binary_cross_entropy(jt.clamp(pred_masks, 0, 1), mask_t)
            else:
                pre_loss = nn.smooth_l1_loss(pred_masks, mask_t, reduction='none')

            if cfg.mask_proto_normalize_mask_loss_by_sqrt_area:
                gt_area  = jt.sum(mask_t, dim=(0, 1), keepdims=True)
                pre_loss = pre_loss / (jt.sqrt(gt_area) + 0.0001)
            
            if cfg.mask_proto_reweight_mask_loss:
                pre_loss = pre_loss * mask_reweighting[:, :, pos_idx_t]
            
                
            if cfg.mask_proto_normalize_emulate_roi_pooling:
                weight = mask_h * mask_w if cfg.mask_proto_crop else 1
                pos_gt_csize = center_size(pos_gt_box_t)
                gt_box_width  = pos_gt_csize[:, 2] * mask_w
                gt_box_height = pos_gt_csize[:, 3] * mask_h
                pre_loss = pre_loss.sum(0).sum(0) / gt_box_width / gt_box_height * weight
            

            # If the number of masks were limited scale the loss accordingly
            if old_num_pos > num_pos:
                pre_loss *= old_num_pos / num_pos

            loss_m += jt.sum(pre_loss)

            if cfg.use_maskiou:
                if cfg.discard_mask_area > 0:
                    gt_mask_area = jt.sum(mask_t, dim=(0, 1))
                    select = gt_mask_area > cfg.discard_mask_area

                    if jt.sum(select).item() < 1:
                        continue

                    pos_gt_box_t = pos_gt_box_t[select, :]
                    pred_masks = pred_masks[:, :, select]
                    mask_t = mask_t[:, :, select]
                    label_t = label_t[select]

                maskiou_net_input = pred_masks.permute(2, 0, 1).unsqueeze(1)
                pred_masks = (pred_masks>0.5).float()                
                maskiou_t = self._mask_iou(pred_masks, mask_t)
                
                maskiou_net_input_list.append(maskiou_net_input)
                maskiou_t_list.append(maskiou_t)
                label_t_list.append(label_t)
        
        losses = {'M': loss_m * cfg.mask_alpha / mask_h / mask_w}

        if cfg.mask_proto_coeff_diversity_loss:
            losses['D'] = loss_d

        if cfg.use_maskiou:
            # discard_mask_area discarded every mask in the batch, so nothing to do here
            if len(maskiou_t_list) == 0:
                return losses, None

            maskiou_t = jt.contrib.concat(maskiou_t_list)
            label_t = jt.contrib.concat(label_t_list)
            maskiou_net_input = jt.contrib.concat(maskiou_net_input_list)

            num_samples = maskiou_t.shape[0]
            if cfg.maskious_to_train > 0 and num_samples > cfg.maskious_to_train:
                perm = jt.randperm(num_samples)
                select = perm[:cfg.masks_to_train]
                maskiou_t = maskiou_t[select]
                label_t = label_t[select]
                maskiou_net_input = maskiou_net_input[select]

            return losses, [maskiou_net_input, maskiou_t, label_t]

        return losses
def prepare_for_coco_segmentation(predictions, dataset):
    import pycocotools.mask as mask_util
    import numpy as np

    masker = Masker(threshold=0.5, padding=1)
    # assert isinstance(dataset, COCODataset)
    coco_results = []
    for image_id in tqdm(predictions):
        prediction = predictions[image_id]
        original_id = dataset.id_to_img_map[image_id]
        if len(prediction) == 0:
            continue

        img_info = dataset.get_img_info(image_id)
        image_width = img_info["width"]
        image_height = img_info["height"]
        # print(prediction.get_field("mask").shape,image_height,image_width)
        prediction = prediction.resize((image_width, image_height))
        masks = prediction.get_field("mask")
        # t = time.time()
        # Masker is necessary only if masks haven't been already resized.
        # print(masks.shape)

        if prediction.has_field('mask_th'):
            # resize masks
            stride_mask = prediction.get_field('stride')
            input_w, input_h = prediction.size

            h = (masks.shape[1] * stride_mask.float() * image_height / input_h).ceil().int32().item()
            w = (masks.shape[2] * stride_mask.float() * image_width / input_w).ceil().int32().item()
            mask_th = prediction.get_field('mask_th')
            masks = (nn.interpolate(masks.unsqueeze(1).float(), size=(int(h), int(w)), mode="bilinear", align_corners=False)>mask_th)
            masks = masks[:, :, :image_height, :image_width]
        else:
            if list(masks.shape[-2:]) != [image_height, image_width]:
                masks = masker([masks], prediction)
                masks = masks[0]
        # logger.info('Time mask: {}'.format(time.time() - t))
        # prediction = prediction.convert('xywh')

        # boxes = prediction.bbox.tolist()
        scores = prediction.get_field("scores").tolist()
        labels = prediction.get_field("labels").tolist()

        # rles = prediction.get_field('mask')
        masks = masks.numpy()
        rles = [
            mask_util.encode(np.array(mask[0, :, :, np.newaxis], order="F"))[0]
            for mask in masks
        ]
        for rle in rles:
            rle["counts"] = rle["counts"].decode("utf-8")

        mapped_labels = [dataset.contiguous_category_id_to_json_id[i] for i in labels]

        coco_results.extend(
            [
                {
                    "image_id": original_id,
                    "category_id": mapped_labels[k],
                    "segmentation": rle,
                    "score": scores[k],
                }
                for k, rle in enumerate(rles)
            ]
        )
    return coco_results
 def execute(self, x):
     return nn.interpolate(x, *self.args, **self.kwdargs)
Beispiel #18
0
def postprocess(det_output,
                w,
                h,
                batch_idx=0,
                interpolation_mode='bilinear',
                visualize_lincomb=False,
                crop_masks=True,
                score_threshold=0):
    """
    Postprocesses the output of Yolact on testing mode into a format that makes sense,
    accounting for all the possible configuration settings.

    Args:
        - det_output: The lost of dicts that Detect outputs.
        - w: The real with of the image.
        - h: The real height of the image.
        - batch_idx: If you have multiple images for this batch, the image's index in the batch.
        - interpolation_mode: Can be 'nearest' | 'area' | 'bilinear' (see jt.nn.functional.interpolate)

    Returns 4 jt Tensors (in the following order):
        - classes [num_det]: The class idx for each detection.
        - scores  [num_det]: The confidence score for each detection.
        - boxes   [num_det, 4]: The bounding box for each detection in absolute point form.
        - masks   [num_det, h, w]: Full image masks for each detection.
    """

    dets = det_output[batch_idx]
    net = dets['net']
    dets = dets['detection']

    if dets is None:
        return [jt.array([])
                ] * 4  # Warning, this is 4 copies of the same thing

    if score_threshold > 0:
        keep = dets['score'] > score_threshold

        for k in dets:
            if k != 'proto':
                dets[k] = dets[k][keep]

        if dets['score'].shape[0] == 0:
            return [jt.array([])] * 4

    # Actually extract everything from dets now
    classes = dets['class']
    boxes = dets['box']
    scores = dets['score']
    masks = dets['mask']

    if cfg.mask_type == mask_type.lincomb and cfg.eval_mask_branch:
        # At this points masks is only the coefficients
        proto_data = dets['proto']

        # Test flag, do not upvote
        if cfg.mask_proto_debug:
            np.save('scripts/proto.npy', proto_data.numpy())

        if visualize_lincomb:
            display_lincomb(proto_data, masks)

        masks = jt.matmul(proto_data, masks.transpose(1, 0))
        masks = cfg.mask_proto_mask_activation(masks)

        # Crop masks before upsampling because you know why
        if crop_masks:
            masks = crop(masks, boxes)

        # Permute into the correct output shape [num_dets, proto_h, proto_w]
        masks = masks.permute(2, 0, 1)

        if cfg.use_maskiou:
            with timer.env('maskiou_net'):
                with jt.no_grad():
                    maskiou_p = net.maskiou_net(masks.unsqueeze(1))
                    maskiou_p = jt.gather(
                        maskiou_p, dim=1,
                        index=classes.unsqueeze(1)).squeeze(1)
                    if cfg.rescore_mask:
                        if cfg.rescore_bbox:
                            scores = scores * maskiou_p
                        else:
                            scores = [scores, scores * maskiou_p]

        # Scale masks up to the full image
        masks = nn.interpolate(masks.unsqueeze(0), (h, w),
                               mode=interpolation_mode,
                               align_corners=False).squeeze(0)

        # Binarize the masks
        masks = masks > 0.5

    boxes[:, 0], boxes[:, 2] = sanitize_coordinates(boxes[:, 0],
                                                    boxes[:, 2],
                                                    w,
                                                    cast=False)
    boxes[:, 1], boxes[:, 3] = sanitize_coordinates(boxes[:, 1],
                                                    boxes[:, 3],
                                                    h,
                                                    cast=False)
    boxes = boxes.int32()

    if cfg.mask_type == mask_type.direct and cfg.eval_mask_branch:
        # Upscale masks
        full_masks = jt.zeros(masks.shape[0], h, w)

        for jdx in range(masks.shape[0]):
            x1, y1, x2, y2 = boxes[jdx]

            mask_w = x2 - x1
            mask_h = y2 - y1

            # Just in case
            if mask_w * mask_h <= 0 or mask_w < 0:
                continue

            mask = masks[jdx].view(1, 1, cfg.mask_size, cfg.mask_size)
            mask = nn.interpolate(mask, (mask_h, mask_w),
                                  mode=interpolation_mode,
                                  align_corners=False)
            mask = (mask > 0.5).float()
            full_masks[jdx, y1:y2, x1:x2] = mask

        masks = full_masks

    return classes, scores, boxes, masks
Beispiel #19
0
    def __init__(self,
                 dlatent_size=512,
                 num_channels=3,
                 resolution=1024,
                 fmap_base=8192,
                 fmap_decay=1.0,
                 fmap_max=512,
                 use_styles=True,
                 const_input_layer=True,
                 use_noise=True,
                 nonlinearity='lrelu',
                 use_wscale=True,
                 use_pixel_norm=False,
                 use_instance_norm=True,
                 blur_filter=None,
                 structure='linear',
                 **kwargs):
        """
        Synthesis network used in the StyleGAN paper.

        :param dlatent_size: Disentangled latent (W) dimensionality.
        :param num_channels: Number of output color channels.
        :param resolution: Output resolution.
        :param fmap_base: Overall multiplier for the number of feature maps.
        :param fmap_decay: log2 feature map reduction when doubling the resolution.
        :param fmap_max: Maximum number of feature maps in any layer.
        :param use_styles: Enable style inputs?
        :param const_input_layer: First layer is a learned constant?
        :param use_noise: Enable noise inputs?
        # :param randomize_noise: True = randomize noise inputs every time (non-deterministic),
                                  False = read noise inputs from variables.
        :param nonlinearity: Activation function: 'relu', 'lrelu'
        :param use_wscale: Enable equalized learning rate?
        :param use_pixel_norm: Enable pixel_wise feature vector normalization?
        :param use_instance_norm: Enable instance normalization?
        :param blur_filter: Low-pass filter to apply when resampling activations. None = no filtering.
        :param structure: 'fixed' = no progressive growing, 'linear' = human-readable
        :param kwargs: Ignore unrecognized keyword args.
        """

        super().__init__()

        # if blur_filter is None:
        #     blur_filter = [1, 2, 1]

        def nf(stage):
            return min(int(fmap_base / (2.0**(stage * fmap_decay))), fmap_max)

        self.structure = structure

        resolution_log2 = int(np.log2(resolution))
        assert resolution == 2**resolution_log2 and resolution >= 4
        self.depth = resolution_log2 - 1

        self.num_layers = resolution_log2 * 2 - 2
        self.num_styles = self.num_layers if use_styles else 1

        act, gain = {
            'relu': (nn.ReLU(), np.sqrt(2)),
            'lrelu': (nn.LeakyReLU(scale=0.2), np.sqrt(2))
        }[nonlinearity]

        # Early layers.
        self.init_block = InputBlock(nf(1), dlatent_size, const_input_layer,
                                     gain, use_wscale, use_noise,
                                     use_pixel_norm, use_instance_norm,
                                     use_styles, act)
        # create the ToRGB layers for various outputs
        rgb_converters = [
            EqualizedConv2d(nf(1),
                            num_channels,
                            1,
                            gain=1,
                            use_wscale=use_wscale)
        ]

        # Building blocks for remaining layers.
        blocks = []
        for res in range(3, resolution_log2 + 1):
            last_channels = nf(res - 2)
            channels = nf(res - 1)
            # name = '{s}x{s}'.format(s=2 ** res)
            blocks.append(
                GSynthesisBlock(last_channels, channels, blur_filter,
                                dlatent_size, gain, use_wscale, use_noise,
                                use_pixel_norm, use_instance_norm, use_styles,
                                act))
            rgb_converters.append(
                EqualizedConv2d(channels,
                                num_channels,
                                1,
                                gain=1,
                                use_wscale=use_wscale))

        self.blocks = nn.ModuleList(blocks)
        self.to_rgb = nn.ModuleList(rgb_converters)

        # register the temporary upsampler
        # self.temporaryUpsampler = lambda x: interpolate(x, scale_factor=2)
        self.temporaryUpsampler = lambda x: nn.interpolate(
            x, scale_factor=2, mode='nearest')
Beispiel #20
0
def train(hyp, opt, tb_writer=None):
    logger.info(
        colorstr('hyperparameters: ') + ', '.join(f'{k}={v}'
                                                  for k, v in hyp.items()))
    save_dir, epochs, batch_size, weights = Path(
        opt.save_dir), opt.epochs, opt.batch_size, opt.weights

    # Directories
    wdir = save_dir / 'weights'
    wdir.mkdir(parents=True, exist_ok=True)  # make dir
    last = wdir / 'last.pkl'
    best = wdir / 'best.pkl'
    results_file = save_dir / 'results.txt'

    # Save run settings
    with open(save_dir / 'hyp.yaml', 'w') as f:
        yaml.dump(hyp, f, sort_keys=False)
    with open(save_dir / 'opt.yaml', 'w') as f:
        yaml.dump(vars(opt), f, sort_keys=False)

    # Configure
    plots = not opt.evolve  # create plots
    cuda = not opt.no_cuda
    if cuda:
        jt.flags.use_cuda = 1

    init_seeds(1)
    with open(opt.data) as f:
        data_dict = yaml.load(f, Loader=yaml.SafeLoader)  # data dict

    check_dataset(data_dict)  # check
    train_path = data_dict['train']
    test_path = data_dict['val']
    nc = 1 if opt.single_cls else int(data_dict['nc'])  # number of classes
    names = ['item'] if opt.single_cls and len(
        data_dict['names']) != 1 else data_dict['names']  # class names
    assert len(names) == nc, '%g names found for nc=%g dataset in %s' % (
        len(names), nc, opt.data)  # check

    # Model
    model = Model(opt.cfg, ch=3, nc=nc)  # create
    pretrained = weights.endswith('.pkl')
    if pretrained:
        model.load(weights)  # load

    # Optimizer
    nbs = 64  # nominal batch size
    accumulate = max(round(nbs / batch_size),
                     1)  # accumulate loss before optimizing
    hyp['weight_decay'] *= batch_size * accumulate / nbs  # scale weight_decay
    logger.info(f"Scaled weight_decay = {hyp['weight_decay']}")

    pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
    for k, v in model.named_modules():
        if hasattr(v, 'bias') and isinstance(v.bias, jt.Var):
            pg2.append(v.bias)  # biases
        if isinstance(v, nn.BatchNorm):
            pg0.append(v.weight)  # no decay
        elif hasattr(v, 'weight') and isinstance(v.weight, jt.Var):
            pg1.append(v.weight)  # apply decay

    if opt.adam:
        optimizer = optim.Adam(pg0,
                               lr=hyp['lr0'],
                               betas=(hyp['momentum'],
                                      0.999))  # adjust beta1 to momentum
    else:
        optimizer = optim.SGD(pg0,
                              lr=hyp['lr0'],
                              momentum=hyp['momentum'],
                              nesterov=True)

    optimizer.add_param_group({
        'params': pg1,
        'weight_decay': hyp['weight_decay']
    })  # add pg1 with weight_decay
    optimizer.add_param_group({'params': pg2})  # add pg2 (biases)
    logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' %
                (len(pg2), len(pg1), len(pg0)))
    del pg0, pg1, pg2

    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
    # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR
    lf = one_cycle(1, hyp['lrf'], epochs)  # cosine 1->hyp['lrf']
    scheduler = optim.LambdaLR(optimizer, lr_lambda=lf)
    # plot_lr_scheduler(optimizer, scheduler, epochs)

    loggers = {}  # loggers dict

    start_epoch, best_fitness = 0, 0.0

    # Image sizes
    gs = int(model.stride.max())  # grid size (max stride)
    nl = model.model[
        -1].nl  # number of detection layers (used for scaling hyp['obj'])
    imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size
                         ]  # verify imgsz are gs-multiples

    # EMA
    ema = ModelEMA(model)

    # Trainloader
    dataloader = create_dataloader(train_path,
                                   imgsz,
                                   batch_size,
                                   gs,
                                   opt,
                                   hyp=hyp,
                                   augment=True,
                                   cache=opt.cache_images,
                                   rect=opt.rect,
                                   workers=opt.workers,
                                   image_weights=opt.image_weights,
                                   quad=opt.quad,
                                   prefix=colorstr('train: '))

    mlc = np.concatenate(dataloader.labels, 0)[:, 0].max()  # max label class
    nb = len(dataloader)  # number of batches
    assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % (
        mlc, nc, opt.data, nc - 1)

    ema.updates = start_epoch * nb // accumulate  # set EMA updates
    testloader = create_dataloader(
        test_path,
        imgsz_test,
        batch_size,
        gs,
        opt,  # testloader
        hyp=hyp,
        cache=opt.cache_images and not opt.notest,
        rect=True,
        workers=opt.workers,
        pad=0.5,
        prefix=colorstr('val: '))

    labels = np.concatenate(dataloader.labels, 0)
    c = jt.array(labels[:, 0])  # classes

    # cf = torch.bincount(c.int(), minlength=nc) + 1.  # frequency
    # model._initialize_biases(cf)
    if plots:
        plot_labels(labels, save_dir, loggers)
        if tb_writer:
            tb_writer.add_histogram('classes', c.numpy(), 0)

    # Anchors
    if not opt.noautoanchor:
        check_anchors(dataloader,
                      model=model,
                      thr=hyp['anchor_t'],
                      imgsz=imgsz)

    # Model parameters
    hyp['box'] *= 3. / nl  # scale to layers
    hyp['cls'] *= nc / 80. * 3. / nl  # scale to classes and layers
    hyp['obj'] *= (imgsz / 640)**2 * 3. / nl  # scale to image size and layers
    model.nc = nc  # attach number of classes to model
    model.hyp = hyp  # attach hyperparameters to model
    model.gr = 1.0  # iou loss ratio (obj_loss = 1.0 or iou)
    model.class_weights = labels_to_class_weights(
        dataloader.labels, nc) * nc  # attach class weights
    model.names = names
    # Start training
    t0 = time.time()
    nw = max(round(hyp['warmup_epochs'] * nb),
             1000)  # number of warmup iterations, max(3 epochs, 1k iterations)
    # nw = min(nw, (epochs - start_epoch) / 2 * nb)  # limit warmup to < 1/2 of training
    maps = np.zeros(nc)  # mAP per class
    results = (0, 0, 0, 0, 0, 0, 0
               )  # P, R, [email protected], [email protected], val_loss(box, obj, cls)
    scheduler.last_epoch = start_epoch - 1  # do not move
    logger.info(f'Image sizes {imgsz} train, {imgsz_test} test\n'
                f'Using {dataloader.num_workers} dataloader workers\n'
                f'Logging results to {save_dir}\n'
                f'Starting training for {epochs} epochs...')
    for epoch in range(
            start_epoch, epochs
    ):  # epoch ------------------------------------------------------------------
        model.train()

        # Update image weights (optional)
        if opt.image_weights:
            # Generate indices
            cw = model.class_weights.numpy() * (1 -
                                                maps)**2 / nc  # class weights
            iw = labels_to_image_weights(dataloader.labels,
                                         nc=nc,
                                         class_weights=cw)  # image weights
            dataloader.indices = random.choices(
                range(dataloader.n), weights=iw,
                k=dataloader.n)  # rand weighted idx

        # Update mosaic border
        # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
        # dataset.mosaic_border = [b - imgsz, -b]  # height, width borders

        mloss = jt.zeros((4, ))  # mean losses
        pbar = enumerate(dataloader)
        logger.info(
            ('\n' + '%10s' * 7) %
            ('Epoch', 'box', 'obj', 'cls', 'total', 'targets', 'img_size'))
        pbar = tqdm(pbar, total=nb)  # progress bar
        for i, (
                imgs, targets, paths, _
        ) in pbar:  # batch -------------------------------------------------------------
            ni = i + nb * epoch  # number integrated batches (since train start)
            imgs = imgs.float() / 255.0  # uint8 to float32, 0-255 to 0.0-1.0

            # Warmup
            if ni <= nw:
                xi = [0, nw]  # x interp
                # model.gr = np.interp(ni, xi, [0.0, 1.0])  # iou loss ratio (obj_loss = 1.0 or iou)
                # accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round())

                for j, x in enumerate(optimizer.param_groups):
                    # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
                    x['lr'] = np.interp(ni, xi, [
                        hyp['warmup_bias_lr'] if j == 2 else 0.0,
                        x['initial_lr'] * lf(epoch)
                    ])
                    if 'momentum' in x:
                        x['momentum'] = np.interp(
                            ni, xi, [hyp['warmup_momentum'], hyp['momentum']])

            # Multi-scale
            if opt.multi_scale:
                sz = random.randrange(imgsz * 0.5,
                                      imgsz * 1.5 + gs) // gs * gs  # size
                sf = sz / max(imgs.shape[2:])  # scale factor
                if sf != 1:
                    ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]
                          ]  # new shape (stretched to gs-multiple)
                    imgs = nn.interpolate(imgs,
                                          size=ns,
                                          mode='bilinear',
                                          align_corners=False)
            # Forward
            pred = model(imgs)  # forward
            loss, loss_items = compute_loss(pred, targets,
                                            model)  # loss scaled by batch_size
            if opt.quad:
                loss *= 4.

            # Optimize
            optimizer.step(loss)
            if ema:
                ema.update(model)

            # Print
            mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
            s = ('%10s' + '%10.4g' * 6) % ('%g/%g' %
                                           (epoch, epochs - 1), *mloss,
                                           targets.shape[0], imgs.shape[-1])
            pbar.set_description(s)

            # Plot
            if plots and ni < 3:
                f = save_dir / f'train_batch{ni}.jpg'  # filename
                Thread(target=plot_images,
                       args=(imgs, targets, paths, f),
                       daemon=True).start()
                # if tb_writer:
                #     tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch)
                #     tb_writer.add_graph(model, imgs)  # add model to tensorboard

            # end batch ------------------------------------------------------------------------------------------------
        # end epoch ----------------------------------------------------------------------------------------------------

        # Scheduler
        lr = [x['lr'] for x in optimizer.param_groups]  # for tensorboard
        scheduler.step()

        # mAP
        if ema:
            ema.update_attr(model,
                            include=[
                                'yaml', 'nc', 'hyp', 'gr', 'names', 'stride',
                                'class_weights'
                            ])
        final_epoch = epoch + 1 == epochs
        if not opt.notest or final_epoch:  # Calculate mAP
            results, maps, times = test.test(data=opt.data,
                                             batch_size=batch_size,
                                             imgsz=imgsz_test,
                                             model=ema.ema,
                                             single_cls=opt.single_cls,
                                             dataloader=testloader,
                                             save_dir=save_dir,
                                             plots=plots and final_epoch)

        # Write
        with open(results_file, 'a') as f:
            f.write(s + '%10.4g' * 7 % results +
                    '\n')  # P, R, [email protected], [email protected], val_loss(box, obj, cls)
        if len(opt.name) and opt.bucket:
            os.system('gsutil cp %s gs://%s/results/results%s.txt' %
                      (results_file, opt.bucket, opt.name))

        # Log
        tags = [
            'train/box_loss',
            'train/obj_loss',
            'train/cls_loss',  # train loss
            'metrics/precision',
            'metrics/recall',
            'metrics/mAP_0.5',
            'metrics/mAP_0.5-0.95',
            'val/box_loss',
            'val/obj_loss',
            'val/cls_loss',  # val loss
            'x/lr0',
            'x/lr1',
            'x/lr2'
        ]  # params
        for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags):
            if tb_writer:
                if hasattr(x, "numpy"):
                    x = x.numpy()
                tb_writer.add_scalar(tag, x, epoch)  # tensorboard

        # Update best mAP
        fi = fitness(np.array(results).reshape(
            1, -1))  # weighted combination of [P, R, [email protected], [email protected]]
        if fi > best_fitness:
            best_fitness = fi

        # Save model
        save = (not opt.nosave) or (final_epoch and not opt.evolve)
        if save:
            # Save last, best and delete
            jt.save(ema.ema.state_dict(), last)
            if best_fitness == fi:
                jt.save(ema.ema.state_dict(), best)
        # end epoch ----------------------------------------------------------------------------------------------------
    # end training
    # Strip optimizers
    final = best if best.exists() else last  # final model
    if opt.bucket:
        os.system(f'gsutil cp {final} gs://{opt.bucket}/weights')  # upload

    # Plots
    if plots:
        plot_results(save_dir=save_dir)  # save as results.png

    # Test best.pkl
    logger.info('%g epochs completed in %.3f hours.\n' %
                (epoch - start_epoch + 1, (time.time() - t0) / 3600))
    best_model = Model(opt.cfg)
    best_model.load(str(final))
    best_model = best_model.fuse()
    if opt.data.endswith('coco.yaml') and nc == 80:  # if COCO
        for conf, iou, save_json in ([0.25, 0.45,
                                      False], [0.001, 0.65,
                                               True]):  # speed, mAP tests
            results, _, _ = test.test(opt.data,
                                      batch_size=total_batch_size,
                                      imgsz=imgsz_test,
                                      conf_thres=conf,
                                      iou_thres=iou,
                                      model=best_model,
                                      single_cls=opt.single_cls,
                                      dataloader=testloader,
                                      save_dir=save_dir,
                                      save_json=save_json,
                                      plots=False)

    return results
Beispiel #21
0
    return 1 / (1 + np.exp(-x))


img_fmt = './data/coco/images/%012d.jpg'
with open('info.txt', 'r') as f:
    img_id = int(f.read())

img = plt.imread(img_fmt % img_id).astype(np.float32)
h, w, _ = img.shape

gt_masks = np.load('gt.npy').astype(np.float32).transpose(1, 2, 0)
proto_masks = np.load('proto.npy').astype(np.float32)

proto_masks = jt.array(proto_masks).permute(2, 0, 1).unsqueeze(0)
proto_masks = nn.interpolate(proto_masks, (h, w),
                             mode='bilinear',
                             align_corners=False).squeeze(0)
proto_masks = proto_masks.permute(1, 2, 0).numpy()

# # A x = b
ls_A = proto_masks.reshape(-1, proto_masks.shape[-1])
ls_b = gt_masks.reshape(-1, gt_masks.shape[-1])

# x is size [256, num_gt]
x = np.linalg.lstsq(ls_A, ls_b, rcond=None)[0]

approximated_masks = (np.matmul(proto_masks, x) > 0.5).astype(np.float32)

num_gt = approximated_masks.shape[2]
ious = mask_iou(
    jt.array(approximated_masks.reshape(-1, num_gt).transpose(1, 0)),
Beispiel #22
0
    def execute(self, x):
        """ The input should be of size [batch_size, 3, img_h, img_w] """
        _, _, img_h, img_w = x.shape
        cfg._tmp_img_h = img_h
        cfg._tmp_img_w = img_w

        with timer.env('backbone'):
            outs = self.backbone(x)

        if cfg.fpn is not None:
            with timer.env('fpn'):
                # Use backbone.selected_layers because we overwrote self.selected_layers
                outs = [outs[i] for i in cfg.backbone.selected_layers]
                outs = self.fpn(outs)
        proto_out = None
        if cfg.mask_type == mask_type.lincomb and cfg.eval_mask_branch:
            with timer.env('proto'):
                proto_x = x if self.proto_src is None else outs[self.proto_src]

                if self.num_grids > 0:
                    grids = self.grid.repeat(proto_x.shape[0], 1, 1, 1)
                    proto_x = jt.contrib.concat([proto_x, grids], dim=1)

                proto_out = self.proto_net(proto_x)
                proto_out = cfg.mask_proto_prototype_activation(proto_out)

                if cfg.mask_proto_prototypes_as_features:
                    # Clone here because we don't want to permute this, though idk if contiguous makes this unnecessary
                    proto_downsampled = proto_out.clone()

                    if cfg.mask_proto_prototypes_as_features_no_grad:
                        proto_downsampled = proto_out.detach()

                # Move the features last so the multiplication is easy
                proto_out = proto_out.permute(0, 2, 3, 1)

                if cfg.mask_proto_bias:
                    bias_shape = [x for x in proto_out.shape]
                    bias_shape[-1] = 1
                    proto_out = jt.contrib.concat(
                        [proto_out, jt.ones(bias_shape)], -1)

        with timer.env('pred_heads'):
            pred_outs = {'loc': [], 'conf': [], 'mask': [], 'priors': []}

            if cfg.use_mask_scoring:
                pred_outs['score'] = []

            if cfg.use_instance_coeff:
                pred_outs['inst'] = []

            for idx, pred_layer in zip(self.selected_layers,
                                       self.prediction_layers):
                pred_x = outs[idx]

                if cfg.mask_type == mask_type.lincomb and cfg.mask_proto_prototypes_as_features:
                    # Scale the prototypes down to the current prediction layer's size and add it as inputs
                    proto_downsampled = nn.interpolate(
                        proto_downsampled,
                        size=outs[idx].shape[2:],
                        mode='bilinear',
                        align_corners=False)
                    # proto_downsampled = interpolate(proto_downsampled, size=outs[idx].shape[2:], mode='bilinear', align_corners=False)

                    pred_x = jt.contrib.concat([pred_x, proto_downsampled],
                                               dim=1)

                # A hack for the way dataparallel works
                if cfg.share_prediction_module and pred_layer is not self.prediction_layers[
                        0]:
                    pred_layer.parent = [self.prediction_layers[0]]

                p = pred_layer(pred_x)

                for k, v in p.items():
                    pred_outs[k].append(v)

        for k, v in pred_outs.items():
            pred_outs[k] = jt.contrib.concat(v, -2)

        if proto_out is not None:
            pred_outs['proto'] = proto_out

        #print('hh',pred_outs)
        #print()
        if self.is_training():
            # For the extra loss functions
            if cfg.use_class_existence_loss:
                pred_outs['classes'] = self.class_existence_fc(
                    outs[-1].mean(dim=(2, 3)))

            if cfg.use_semantic_segmentation_loss:
                pred_outs['segm'] = self.semantic_seg_conv(outs[0])

            return pred_outs
        else:
            if cfg.use_mask_scoring:
                pred_outs['score'] = jt.sigmoid(pred_outs['score'])
            if cfg.use_focal_loss:
                if cfg.use_sigmoid_focal_loss:
                    # Note: even though conf[0] exists, this mode doesn't train it so don't use it
                    pred_outs['conf'] = jt.sigmoid(pred_outs['conf'])
                    if cfg.use_mask_scoring:
                        pred_outs['conf'] *= pred_outs['score']
                elif cfg.use_objectness_score:
                    # See focal_loss_sigmoid in multibox_loss.py for details
                    objectness = jt.sigmoid(pred_outs['conf'][:, :, 0])
                    pred_outs['conf'][:, :, 1:] = objectness.unsqueeze(
                        2) * nn.softmax(pred_outs['conf'][:, :, 1:], -1)
                    pred_outs['conf'][:, :, 0] = 1 - objectness
                else:
                    pred_outs['conf'] = nn.softmax(pred_outs['conf'], -1)
            else:

                if cfg.use_objectness_score:
                    objectness = jt.sigmoid(pred_outs['conf'][:, :, 0])

                    pred_outs['conf'][:, :, 1:] = (objectness > 0.10).unsqueeze(-1) \
                        * nn.softmax(pred_outs['conf'][:, :, 1:], dim=-1)

                else:
                    pred_outs['conf'] = nn.softmax(pred_outs['conf'], -1)
            return self.detect(pred_outs, self)