Esempio n. 1
0
def get_all_anchors_FPN(stride, sizes):
    # Generates a NAx4 matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
    # are centered on stride / 2, have (approximate) sqrt areas of the specified
    # sizes, and aspect ratios as given.
    cell_anchors = generate_anchors(
        stride,
        scales=np.array([sizes], dtype=np.float) / stride,
        ratios=np.array(config.ANCHOR_RATIOS, dtype=np.float))
    # anchors are intbox here.
    # anchors at featuremap [0,0] are centered at fpcoor (8,8) (half of stride)
    fpn_max_size = 32 * np.ceil(
        config.MAX_SIZE / 32
    )
    field_size = int(np.ceil(fpn_max_size / float(stride)))

    # field_size = config.MAX_SIZE // stride
    shifts = np.arange(0, field_size) * stride
    shift_x, shift_y = np.meshgrid(shifts, shifts)
    shift_x = shift_x.flatten()
    shift_y = shift_y.flatten()
    shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose()
    # Kx4, K = field_size * field_size
    K = shifts.shape[0]

    A = cell_anchors.shape[0]
    field_of_anchors = (
        cell_anchors.reshape((1, A, 4)) +
        shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
    field_of_anchors = field_of_anchors.reshape((field_size, field_size, A, 4))
    # FSxFSxAx4
    assert np.all(field_of_anchors == field_of_anchors.astype('int32'))
    field_of_anchors = field_of_anchors.astype('float32')
    field_of_anchors[:, :, :, [2, 3]] += 1
    return field_of_anchors
Esempio n. 2
0
    def __init__(self, db, video_names, data_dir, z_transforms, x_transforms, training=True):
        self.video_names = video_names
        self.data_dir = data_dir
        self.z_transforms = z_transforms
        self.x_transforms = x_transforms
        meta_data_path = os.path.join(data_dir, 'meta_data.pkl')
        self.meta_data = pickle.load(open(meta_data_path, 'rb'))
        self.meta_data = {x[0]: x[1] for x in self.meta_data}
        # filter traj len less than 2
        for key in self.meta_data.keys():
            trajs = self.meta_data[key]
            for trkid in list(trajs.keys()):
                if len(trajs[trkid]) < 2:
                    del trajs[trkid]

        self.txn = db.begin(write=False)
        self.num = len(self.video_names) if config.pairs_per_video_per_epoch is None or not training \
            else config.pairs_per_video_per_epoch * len(self.video_names)

        # data augmentation
        self.max_stretch = config.scale_resize
        self.max_translate = config.max_translate
        self.random_crop_size = config.instance_size
        self.center_crop_size = config.exemplar_size

        self.training = training

        #valid_scope = 2 * config.valid_scope + 1
        self.anchors = generate_anchors(config.total_stride, config.anchor_base_size, config.anchor_scales,
                                        config.anchor_ratios,
                                        config.response_map_size)
        '''
Esempio n. 3
0
 def __init__(self, cf):
     super(ProposalLayer, self).__init__()
     self.cf = cf
     self._feat_stride = self.cf.feat_stride
     self._anchors = torch.from_numpy(
         generate_anchors(scales=np.array(self.cf.anchor_scales),
                          ratios=np.array(self.cf.anchor_ratios))).float()
     self._num_anchors = self._anchors.size(0)
Esempio n. 4
0
    def __init__(self, cf):
        super(AnchorTargetLayer, self).__init__()
        self.cf = cf
        self._feat_stride = self.cf.feat_stride
        self._scales = self.cf.anchor_scales
        self._anchors = torch.from_numpy(
            generate_anchors(scales=np.array(self.cf.anchor_scales),
                             ratios=np.array(self.cf.anchor_ratios))).float()
        self._num_anchors = self._anchors.size(0)

        # allow boxes to sit over the edge by a small amount
        self._allowed_border = 0  # default is 0
Esempio n. 5
0
def get_all_anchors(
        stride=config.ANCHOR_STRIDE,
        sizes=config.ANCHOR_SIZES):
    """
    Get all anchors in the largest possible image, shifted, floatbox
    Args:
        stride (int): the stride of anchors.
        sizes (tuple[int]): the sizes (sqrt area) of anchors

    Returns:
        anchors: SxSxNUM_ANCHORx4, where S == ceil(MAX_SIZE/STRIDE), floatbox
        The layout in the NUM_ANCHOR dim is NUM_RATIO x NUM_SIZE.

    """
    # Generates a NAx4 matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
    # are centered on stride / 2, have (approximate) sqrt areas of the specified
    # sizes, and aspect ratios as given.
    cell_anchors = generate_anchors(
        stride,
        scales=np.array(sizes, dtype=np.float) / stride,
        ratios=np.array(config.ANCHOR_RATIOS, dtype=np.float))
    # anchors are intbox here.
    # anchors at featuremap [0,0] are centered at fpcoor (8,8) (half of stride)

    max_size = config.MAX_SIZE
    if config.MODE_FPN:
        # TODO setting this in config is perhaps better
        size_mult = config.FPN_RESOLUTION_REQUIREMENT * 1.
        max_size = np.ceil(max_size / size_mult) * size_mult
    field_size = int(np.ceil(max_size / stride))
    shifts = np.arange(0, field_size) * stride
    shift_x, shift_y = np.meshgrid(shifts, shifts)
    shift_x = shift_x.flatten()
    shift_y = shift_y.flatten()
    shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose()
    # Kx4, K = field_size * field_size
    K = shifts.shape[0]

    A = cell_anchors.shape[0]
    field_of_anchors = (
        cell_anchors.reshape((1, A, 4)) +
        shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
    field_of_anchors = field_of_anchors.reshape((field_size, field_size, A, 4))
    # FSxFSxAx4
    assert np.all(field_of_anchors == field_of_anchors.astype('int32'))
    field_of_anchors = field_of_anchors.astype('float32')
    field_of_anchors[:, :, :, [2, 3]] += 1
    return field_of_anchors
Esempio n. 6
0
def get_sniper_all_anchors(stride=None, sizes=None):
    """
    Get all anchors in the largest possible image, shifted, floatbox
    Args:
        stride (int): the stride of anchors.
        sizes (tuple[int]): the sizes (sqrt area) of anchors

    Returns:
        anchors: SxSxNUM_ANCHORx4, where S == ceil(MAX_SIZE/STRIDE), floatbox
        The layout in the NUM_ANCHOR dim is NUM_RATIO x NUM_SIZE.

    """
    if stride is None:
        stride = cfg.RPN.ANCHOR_STRIDE
    if sizes is None:
        sizes = cfg.RPN.ANCHOR_SIZES
    # Generates a NAx4 matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
    # are centered on stride / 2, have (approximate) sqrt areas of the specified
    # sizes, and aspect ratios as given.
    cell_anchors = generate_anchors(stride,
                                    scales=np.array(sizes, dtype=np.float) /
                                    stride,
                                    ratios=np.array(cfg.RPN.ANCHOR_RATIOS,
                                                    dtype=np.float))
    # anchors are intbox here.
    # anchors at featuremap [0,0] are centered at fpcoor (8,8) (half of stride)

    # max_size = 0
    max_size = cfg.SNIPER.CHIP_SIZE
    field_size = int(np.ceil(max_size / stride))
    shifts = np.arange(0, field_size) * stride
    shift_x, shift_y = np.meshgrid(shifts, shifts)
    shift_x = shift_x.flatten()
    shift_y = shift_y.flatten()
    shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose()
    # Kx4, K = field_size * field_size
    K = shifts.shape[0]

    A = cell_anchors.shape[0]
    field_of_anchors = (cell_anchors.reshape((1, A, 4)) + shifts.reshape(
        (1, K, 4)).transpose((1, 0, 2)))
    field_of_anchors = field_of_anchors.reshape((field_size, field_size, A, 4))
    # FSxFSxAx4
    # Many rounding happens inside the anchor code anyway
    # assert np.all(field_of_anchors == field_of_anchors.astype('int32'))
    field_of_anchors = field_of_anchors.astype('float32')
    field_of_anchors[:, :, :, [2, 3]] += 1
    return field_of_anchors
Esempio n. 7
0
def get_all_anchors(stride=None, sizes=None):
    """
    Get all anchors in the largest possible image, shifted, floatbox
    Args:
        stride (int): the stride of anchors.
        sizes (tuple[int]): the sizes (sqrt area) of anchors

    Returns:
        anchors: SxSxNUM_ANCHORx4, where S == ceil(MAX_SIZE/STRIDE), floatbox
        The layout in the NUM_ANCHOR dim is NUM_RATIO x NUM_SIZE.

    """
    if stride is None:
        stride = cfg.RPN.ANCHOR_STRIDE
    if sizes is None:
        sizes = cfg.RPN.ANCHOR_SIZES
    # Generates a NAx4 matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
    # are centered on stride / 2, have (approximate) sqrt areas of the specified
    # sizes, and aspect ratios as given.
    cell_anchors = generate_anchors(
        stride,
        scales=np.array(sizes, dtype=np.float) / stride,
        ratios=np.array(cfg.RPN.ANCHOR_RATIOS, dtype=np.float))
    # anchors are intbox here.
    # anchors at featuremap [0,0] are centered at fpcoor (8,8) (half of stride)

    max_size = cfg.PREPROC.MAX_SIZE
    field_size = int(np.ceil(max_size / stride))
    shifts = np.arange(0, field_size) * stride
    shift_x, shift_y = np.meshgrid(shifts, shifts)
    shift_x = shift_x.flatten()
    shift_y = shift_y.flatten()
    shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose()
    # Kx4, K = field_size * field_size
    K = shifts.shape[0]

    A = cell_anchors.shape[0]
    field_of_anchors = (
        cell_anchors.reshape((1, A, 4)) +
        shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
    field_of_anchors = field_of_anchors.reshape((field_size, field_size, A, 4))
    # FSxFSxAx4
    # Many rounding happens inside the anchor code anyway
    # assert np.all(field_of_anchors == field_of_anchors.astype('int32'))
    field_of_anchors = field_of_anchors.astype('float32')
    field_of_anchors[:, :, :, [2, 3]] += 1
    return field_of_anchors
Esempio n. 8
0
def get_all_anchors(stride=config.ANCHOR_STRIDE, sizes=config.ANCHOR_SIZES):
    """
    Get all anchors in the largest possible image, shifted, floatbox
    Args:
        stride (int): the stride of anchors.
        sizes (tuple[int]): the sizes (sqrt area) of anchors

    Returns:
        anchors: SxSxNUM_ANCHORx4, where S == ceil(MAX_SIZE/STRIDE), floatbox
        The layout in the NUM_ANCHOR dim is NUM_RATIO x NUM_SIZE.

    """
    # Generates a NAx4 matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
    # are centered on stride / 2, have (approximate) sqrt areas of the specified
    # sizes, and aspect ratios as given.
    cell_anchors = generate_anchors(stride,
                                    scales=np.array(sizes, dtype=np.float) /
                                    stride,
                                    ratios=np.array(config.ANCHOR_RATIOS,
                                                    dtype=np.float))
    # anchors are intbox here.
    # anchors at featuremap [0,0] are centered at fpcoor (8,8) (half of stride)

    max_size = config.MAX_SIZE
    if config.MODE_FPN:
        # TODO setting this in config is perhaps better
        size_mult = config.FPN_RESOLUTION_REQUIREMENT * 1.
        max_size = np.ceil(max_size / size_mult) * size_mult
    field_size = int(np.ceil(max_size / stride))
    shifts = np.arange(0, field_size) * stride
    shift_x, shift_y = np.meshgrid(shifts, shifts)
    shift_x = shift_x.flatten()
    shift_y = shift_y.flatten()
    shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose()
    # Kx4, K = field_size * field_size
    K = shifts.shape[0]

    A = cell_anchors.shape[0]
    field_of_anchors = (cell_anchors.reshape((1, A, 4)) + shifts.reshape(
        (1, K, 4)).transpose((1, 0, 2)))
    field_of_anchors = field_of_anchors.reshape((field_size, field_size, A, 4))
    # FSxFSxAx4
    assert np.all(field_of_anchors == field_of_anchors.astype('int32'))
    field_of_anchors = field_of_anchors.astype('float32')
    field_of_anchors[:, :, :, [2, 3]] += 1
    return field_of_anchors
Esempio n. 9
0
    def anchor_compose(self, height, width):
        anchors = generate_anchors(ratios=np.array(self.anchor_ratios),
                                   scales=np.array(self.anchor_scales))
        num_anchor = anchors.shape[0]
        shift_x = np.arange(0, width) * self.feat_stride
        shift_y = np.arange(0, height) * self.feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()
        k = shifts.shape[0]
        # width changes faster, so here it is H, W, C
        anchors = anchors.reshape((1, num_anchor, 4)) + shifts.reshape(
            (1, k, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((k * num_anchor, 4)).astype(np.float32,
                                                              copy=False)

        return torch.from_numpy(anchors).cuda()
Esempio n. 10
0
 def __init__(self, spatial_scale=0.0625,
              train=False,
              rpn_pre_nms_top_n = None,
              rpn_post_nms_top_n = None,
              rpn_nms_thresh = None,
              rpn_min_size = 0,
              anchor_sizes=(32, 64, 128, 256, 512), 
              anchor_aspect_ratios=(0.5, 1, 2)):
     super(GenerateProposals, self).__init__()
     self._anchors = generate_anchors(sizes=anchor_sizes, aspect_ratios=anchor_aspect_ratios,stride=1. / spatial_scale)
     self._num_anchors = self._anchors.shape[0]
     self._spatial_scale = spatial_scale
     self._train = train        
     self.rpn_pre_nms_top_n = rpn_pre_nms_top_n if rpn_pre_nms_top_n is not None else (12000 if train else 6000)
     self.rpn_post_nms_top_n = rpn_post_nms_top_n if rpn_post_nms_top_n is not None else (2000 if train else 1000)
     self.rpn_nms_thresh = rpn_nms_thresh if rpn_nms_thresh is not None else 0.7
     self.rpn_min_size = rpn_min_size if rpn_min_size is not None else 0
Esempio n. 11
0
def get_all_anchors(
        stride=config.ANCHOR_STRIDE,
        sizes=config.ANCHOR_SIZES,
        ratios=config.ANCHOR_RATIOS):
    """
    Get all anchors in the largest possible image, shifted, floatbox

    Returns:
        anchors: SxSxNUM_ANCHORx4, where S == MAX_SIZE//STRIDE, floatbox
        The layout in the NUM_ANCHOR dim is NUM_RATIO x NUM_SCALE.

    """
    # Generates a NAx4 matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
    # are centered on stride / 2, have (approximate) sqrt areas of the specified
    # sizes, and aspect ratios as given.
    cell_anchors = generate_anchors(
        stride,
        scales=np.array(sizes, dtype=np.float) / stride,
        ratios=np.array(ratios, dtype=np.float))
    # anchors are intbox here.
    # anchors at featuremap [0,0] are centered at fpcoor (8,8) (half of stride)

    field_size = config.MAX_SIZE // stride
    shifts = np.arange(0, field_size) * stride
    shift_x, shift_y = np.meshgrid(shifts, shifts)
    shift_x = shift_x.flatten()
    shift_y = shift_y.flatten()
    shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose()
    # Kx4, K = field_size * field_size
    K = shifts.shape[0]

    A = cell_anchors.shape[0]
    field_of_anchors = (
        cell_anchors.reshape((1, A, 4)) +
        shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
    field_of_anchors = field_of_anchors.reshape((field_size, field_size, A, 4))
    # FSxFSxAx4
    #assert np.all(field_of_anchors == field_of_anchors.astype('int32'))
    field_of_anchors = field_of_anchors.astype('float32')
    field_of_anchors[:, :, :, [2, 3]] += 1
    return field_of_anchors
Esempio n. 12
0
def get_all_anchors(
        stride=config.ANCHOR_STRIDE,
        sizes=config.ANCHOR_SIZES):
    """
    Get all anchors in the largest possible image, shifted, floatbox

    Returns:
        anchors: SxSxNUM_ANCHORx4, where S == MAX_SIZE//STRIDE, floatbox
        The layout in the NUM_ANCHOR dim is NUM_RATIO x NUM_SCALE.

    """
    # Generates a NAx4 matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
    # are centered on stride / 2, have (approximate) sqrt areas of the specified
    # sizes, and aspect ratios as given.
    cell_anchors = generate_anchors(
        stride,
        scales=np.array(sizes, dtype=np.float) / stride,
        ratios=np.array(config.ANCHOR_RATIOS, dtype=np.float))
    # anchors are intbox here.
    # anchors at featuremap [0,0] are centered at fpcoor (8,8) (half of stride)

    field_size = config.MAX_SIZE // stride
    shifts = np.arange(0, field_size) * stride
    shift_x, shift_y = np.meshgrid(shifts, shifts)
    shift_x = shift_x.flatten()
    shift_y = shift_y.flatten()
    shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose()
    # Kx4, K = field_size * field_size
    K = shifts.shape[0]

    A = cell_anchors.shape[0]
    field_of_anchors = (
        cell_anchors.reshape((1, A, 4)) +
        shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
    field_of_anchors = field_of_anchors.reshape((field_size, field_size, A, 4))
    # FSxFSxAx4
    assert np.all(field_of_anchors == field_of_anchors.astype('int32'))
    field_of_anchors = field_of_anchors.astype('float32')
    field_of_anchors[:, :, :, [2, 3]] += 1
    return field_of_anchors
Esempio n. 13
0
 def __init__(self,
              spatial_scale=0.0625,
              train=False,
              rpn_pre_nms_top_n=None,
              rpn_post_nms_top_n=None,
              rpn_nms_thresh=None,
              rpn_min_size=0,
              anchor_sizes=(32, 64, 128, 256, 512),
              anchor_aspect_ratios=(0.5, 1, 2)):
     super(GenerateProposals, self).__init__()
     self._anchors = generate_anchors(sizes=anchor_sizes,
                                      aspect_ratios=anchor_aspect_ratios,
                                      stride=1. / spatial_scale)
     self._num_anchors = self._anchors.shape[0]
     self._spatial_scale = spatial_scale
     self._train = train
     self.rpn_pre_nms_top_n = rpn_pre_nms_top_n if rpn_pre_nms_top_n is not None else (
         12000 if train else 6000)
     self.rpn_post_nms_top_n = rpn_post_nms_top_n if rpn_post_nms_top_n is not None else (
         2000 if train else 1000)
     self.rpn_nms_thresh = rpn_nms_thresh if rpn_nms_thresh is not None else 0.7
     self.rpn_min_size = rpn_min_size if rpn_min_size is not None else 0
Esempio n. 14
0
    def forward(self, cls_scores, bbox_deltas, gt_boxes, device):
        """
        process proposals from the RPN
        :param bbox_deltas: [N x 4K x H x W ]
        :param cls_scores: [N x 2K x H x W  ] of scores not probabilities
        :param gt_boxes: [M x 4] [x1, y1, x2, y2]
        :return:
        """
        """
        Algorithm
        1) get all center points
        2) make all anchors using center points
        3) apply bbox_deltas
        4) calculate IoUs
        5) find positive labels
        6) find negative labels
        7) sample down the negative labels
        8) calculate losses
        """
        # ensure center and original anchors have been precomputed
        if self.feat_stride is None:
            self.feat_stride = round(self.image_size /
                                     float(cls_scores.size(3)))
        if self.anchors is None:
            self.anchors = generate_anchors(self.feat_stride,
                                            cls_scores.size(3), self.ratios,
                                            self.scales).to(device)

        N, _, H, W = cls_scores.shape
        cls_scores = cls_scores.permute(0, 2, 3, 1)
        # apply bbox deltas but first reshape to (batch,H,W,4K)
        bbox_deltas = bbox_deltas.permute(0, 2, 3, 1)
        # reshape again to match anchors (N,H,W,Kx4)
        bbox_deltas = bbox_deltas.reshape(N, H, W, -1, 4)
        _anchors = self.anchors.float()
        regions = _anchors + bbox_deltas
        # now we clip the boxes to the image
        regions = regions.view(N, -1, 4, H, W).permute(0, 3, 4, 1, 2)
        # reshape to [batch x L x 4]
        regions = regions.reshape(N, -1, 4)
        # filter the cross boundary images in training
        for i in range(N):
            regions[i, :, :] = cross_boundary(regions[i, :, :],
                                              self.image_size,
                                              device,
                                              remove=False)
        # we need anchors to be [L x 4]
        _anchors = _anchors.reshape(-1, 4)
        #get matches/ losses per batch
        cls_scores = cls_scores.reshape(N, -1, 1)
        tot_cls_loss = 0.0
        tot_bbox_loss = 0.0
        tot_fg = 0.0
        tot_bg = 0.0
        for i in range(N):
            matches = match(regions[i, :, :], gt_boxes[i][:, :4].squeeze(0),
                            self.upper, self.lower, device)
            # filter out neither targets
            pos_mask = matches >= 0
            pos_inds = pos_mask.nonzero()
            neg_mask = matches == NEGATIVE
            neg_inds = neg_mask.nonzero()
            # sample 256 anchors
            pos_inds = pos_inds.reshape(-1)
            npos = min(pos_inds.size(0), 128)
            pos_inds_perm = torch.randperm(pos_inds.size(0))[:npos]
            pos_inds = pos_inds[pos_inds_perm]
            bg_num = self.sample_num - npos
            bg_num = min(60, bg_num)
            perm = torch.randperm(neg_inds.size(0))
            sample_neg_inds = perm[:bg_num]
            sample_ned_inds = neg_inds[sample_neg_inds]
            gt_cls = torch.cat(
                (torch.ones(pos_inds.size(0)),
                 torch.zeros(sample_neg_inds.size(0)))).to(device)
            # grab cls_scores from each point
            pred_cls = torch.cat(
                (cls_scores[i, pos_inds],
                 cls_scores[i, sample_neg_inds])).to(device).squeeze()
            # TODO avoid this reshape edge case
            gt_cls = gt_cls.reshape(-1)
            pred_cls = pred_cls.reshape(-1)
            cls_loss = self.cls_loss(pred_cls, gt_cls.reshape(-1))
            if cls_loss != cls_loss:
                print(f"pred_cls: {pred_cls}")
                print(f"gt_cls: {gt_cls}")
            # we only do bbox regression on positive targets
            # get and reshape matches
            gt_indxs = matches[pos_inds].long()
            sample_gt_bbox = gt_boxes[i][:, gt_indxs, :]
            sample_gt_bbox = sample_gt_bbox.reshape(-1, 4)
            sample_pred_bbox = regions[i, pos_inds, :]
            sample_roi_bbox = _anchors[pos_inds, :]
            norm = torch.tensor(N).float()
            bbox_loss = self.bbox_loss(sample_pred_bbox, sample_gt_bbox,
                                       sample_roi_bbox, norm)
            tot_cls_loss = tot_cls_loss + cls_loss
            tot_bbox_loss = tot_bbox_loss + bbox_loss
            tot_fg += npos
            tot_bg += bg_num
        return tot_cls_loss, tot_bbox_loss, tot_bg, tot_fg, pred_cls.mean()
Esempio n. 15
0
import sys
from utils.generate_anchors import generate_anchors
from PIL import Image
from PIL import ImageDraw

r = 1
image_size = 500
map_size = 16
feat_stride = float(image_size) / map_size
ratios = [1.0, 2.0, 0.5]
scales = [32, 64, 128]

base_image = Image.new("RGB", (image_size, image_size), color="#FFF")
centers, anchors = generate_anchors(feat_stride,
                                    map_size,
                                    ratios,
                                    scales,
                                    output_centers=True)
draw = ImageDraw.Draw(base_image)
centers = centers.reshape(-1, 2)
print(anchors)

for x, y in centers.astype(int):
    draw.ellipse((x - r, y - r, x + r, y + r), fill="#f00")
i, j = 6, 6
for idx in range(9):
    x, y, h, w = anchors[i, j, idx]
    x1 = x - w / 2
    y1 = y - h / 2
    x2 = x + w / 2
    y2 = y + h / 2
Esempio n. 16
0
def get_all_anchors(stride=None, sizes=None, tile=True):
    """
    Get all anchors in the largest possible image, shifted, floatbox
    Args:
        stride (int): the stride of anchors.
        sizes (tuple[int]): the sizes (sqrt area) of anchors

    Returns:
        anchors: SxSxNUM_ANCHORx4, where S == ceil(MAX_SIZE/STRIDE), floatbox
        The layout in the NUM_ANCHOR dim is NUM_RATIO x NUM_SIZE.

    """
    if stride is None:
        stride = cfg.RPN.ANCHOR_STRIDE
    if sizes is None:
        sizes = cfg.RPN.ANCHOR_SIZES
    # Generates a NAx4 matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
    # are centered on stride / 2, have (approximate) sqrt areas of the specified
    # sizes, and aspect ratios as given.
    if not cfg.RPN.UNQUANTIZED_ANCHOR:
        cell_anchors = generate_anchors(
            stride,
            scales=np.array(sizes, dtype=np.float) / stride,
            ratios=np.array(cfg.RPN.ANCHOR_RATIOS, dtype=np.float))
    else:
        anchors = []
        ratios=np.array(cfg.RPN.ANCHOR_RATIOS, dtype=np.float)
        for sz in sizes:
            for ratio in ratios:
                w = np.sqrt(sz * sz / ratio)
                h = ratio * w
                anchors.append([-w, -h, w, h])
        cell_anchors = np.asarray(anchors) * 0.5
    # anchors are intbox here.
    # anchors at featuremap [0,0] are centered at fpcoor (8,8) (half of stride)

    if tile:
        max_size = cfg.PREPROC.MAX_SIZE
        field_size = int(np.ceil(max_size / stride))
        if not cfg.RPN.UNQUANTIZED_ANCHOR: shifts = np.arange(0, field_size) * stride
        else: shifts = (np.arange(0, field_size) * stride).astype("float32")
        shift_x, shift_y = np.meshgrid(shifts, shifts)
        shift_x = shift_x.flatten()
        shift_y = shift_y.flatten()
        shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose()
        # Kx4, K = field_size * field_size
        K = shifts.shape[0]

        A = cell_anchors.shape[0]
        if not cfg.RPN.UNQUANTIZED_ANCHOR:
            field_of_anchors = (
                cell_anchors.reshape((1, A, 4)) +
                shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
        else: field_of_anchors = cell_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        field_of_anchors = field_of_anchors.reshape((field_size, field_size, A, 4))
        # FSxFSxAx4
        # Many rounding happens inside the anchor code anyway
        # assert np.all(field_of_anchors == field_of_anchors.astype('int32'))
        field_of_anchors = field_of_anchors.astype('float32')
        if not cfg.RPN.UNQUANTIZED_ANCHOR: field_of_anchors[:, :, :, [2, 3]] += 1
        return field_of_anchors
    else:
        cell_anchors = cell_anchors.astype('float32')
        cell_anchors[:, [2, 3]] += 1
        return cell_anchors
Esempio n. 17
0
    def forward(self, cls_scores, bbox_deltas, device):
        """
        process proposals from the RPN
        :param bbox_deltas: [N x 4K x H x W ]
        :param cls_scores: [N x 2K x H x W  ] of scores not probabilities
        :return:
        """
        """
        Algorithm
        1) get all center points
        2) make all anchors using center points
        3) apply bbox_deltas
        4) clip boxes to image
        5) filter small boxes
        6) pre NMS fitering by score
        7) NMS filtering
        8) post NMS filtering by score
        """
        # ensure center and original anchors have been precomputeds
        if self.feat_stride is None:
            self.feat_stride = round(self.image_size /
                                     float(cls_scores.size(3)))
        if self.anchors is None:
            self.anchors = generate_anchors(self.feat_stride,
                                            cls_scores.size(3), self.ratios,
                                            self.scales).to(device)
        N, _, H, W = cls_scores.shape
        cls_scores = cls_scores.permute(0, 2, 3, 1)

        # apply bbox deltas but first reshape to (0,2,3,1) = (12)(23)
        bbox_deltas = bbox_deltas.permute(0, 2, 3, 1)
        # reshape again to match anchors
        bbox_deltas = bbox_deltas.reshape(N, H, W, -1, 4)
        _anchors = self.anchors.float()
        regions = _anchors + bbox_deltas
        # now we clip the boxes to the image

        # now we can grab the pre NMS regions
        # first we reshape the tensors to be N x K, N x K x 4
        cls_scores = cls_scores.permute(0, 3, 1, 2).reshape(N, -1)
        regions = regions.view(N, -1, 4, H, W).permute(0, 3, 4, 1, 2)
        regions = regions.reshape(N, -1, 4)
        for i in range(N):
            regions[i, :, :] = cross_boundary(regions[i, :, :],
                                              self.image_size,
                                              device,
                                              remove=False)
        pre_nms = min(self.NMS_PRE, cls_scores.size(1))
        _, sort_order = cls_scores.topk(pre_nms, dim=1)
        slices_scores = []
        slices_regions = []
        for i in range(N):
            slice_idxs = sort_order[i, :]
            slice_score = cls_scores[i, slice_idxs]
            slice_region = regions[i, slice_idxs, :]
            slices_regions.append(slice_region)
            slices_scores.append(slice_score)
        cls_scores = torch.stack(slices_scores, dim=0)
        regions = torch.stack(slices_regions, dim=0)
        output = cls_scores.new(N, self.NMS_POST, 5)
        # TODO implement padding here
        for i in range(N):
            keep_idx = nms(regions[i, :, :], cls_scores[i, :], self.threshold)
            keep_idx = keep_idx[:self.NMS_POST]
            output[i, :, 1:] = pad_tensor(regions[i, keep_idx, :],
                                          (self.NMS_POST, 4))
            output[:, :, 0] = pad_tensor(cls_scores[i, keep_idx].unsqueeze(1),
                                         (self.NMS_POST, 1)).squeeze()
        return output
Esempio n. 18
0
def train_NuSeT(self):
    """Train the model, return test loss.

    Args:
        network (dict): the parameters of the network
    return:
        accuracy (float)
    """
    # Get the training parameters
    learning_rate = self.params['lr']
    optimizer = self.params['optimizer']
    num_epoch = self.params['epochs']
    bbox_min_score = self.params['min_score']
    nms_thresh = self.params['nms_threshold']
    normalization_method = self.params['normalization_method']
    # Load the data
    # x_train, y_train: training images and corresponding labels
    # x_val, y_val: validation images and corresponding labels
    # w_train, w_val: training and validation weight matrices for U-Net
    # bbox_train, bbox_val: bounding box coordinates for train and validation dataset
    x_train, x_val, y_train, y_val, w_train, w_val, bbox_train, bbox_val = load_data_train(
        self, normalization_method)

    # pred_dict and pred_dict_final save all the temp variables
    pred_dict_final = {}

    # tensor placeholder for training images with labels
    train_initial = tf.placeholder(dtype=tf.float32, shape=[1, None, None, 1])
    labels = tf.placeholder(dtype=tf.float32, shape=[1, None, None, 1])

    # tensor placeholder for weigth matrices and ground truth bounding boxes
    edge_weights = tf.placeholder(dtype=tf.float32, shape=[1, None, None, 1])
    gt_boxes = tf.placeholder(dtype=tf.float32, shape=[None, 5])

    input_shape = tf.shape(train_initial)

    input_height = input_shape[1]
    input_width = input_shape[2]
    im_shape = tf.cast([input_height, input_width], tf.float32)

    # number of classes needed to be classified, for our case this equals to 2
    # (foreground and background)
    nb_classes = 2

    # feed the initial image to U-Net, we expect 2 outputs:
    # 1. feat_map of shape (1,input_height/16,input_width/16,1024), which will be passed to the
    # region proposal network
    # 2. final_logits of shape(1,input_height,input_width,2), which is the prediction from U-net
    with tf.variable_scope('model_U-Net') as scope:
        final_logits, feat_map = UNET(nb_classes, train_initial)

    # The final_logits has 2 channels for foreground/background softmax scores,
    # then we get prediction with larger score for each pixel
    pred_masks = tf.argmax(final_logits, axis=3)
    pred_masks = tf.reshape(pred_masks, [input_height, input_width])
    pred_masks = tf.to_float(pred_masks)

    # Dynamic anchor base size calculated from median cell lengths
    base_size = anchor_size(tf.reshape(labels, [input_height, input_width]))

    # scales and ratios are used to generate different anchors
    scales = np.array([0.5, 1, 2])
    ratios = np.array([0.125, 0.25, 0.5, 1, 2, 4, 8])

    # stride is to control how sparse we want to place anchors across the image
    # stride = 16 means to place an anchor every 16 pixels on the original image
    stride = 16

    # Generate the anchor reference with respect to the original image
    ref_anchors = generate_anchors_reference(base_size, ratios, scales)
    num_ref_anchors = scales.shape[0] * ratios.shape[0]

    feat_height = input_height / stride
    feat_width = input_width / stride

    # Generate all the anchors based on ref_anchors
    all_anchors = generate_anchors(ref_anchors, stride,
                                   [feat_height, feat_width])

    num_anchors = all_anchors.shape[0]
    with tf.variable_scope('model_RPN') as scope:
        prediction_dict = RPN(feat_map, num_ref_anchors)

    # Get the tensors from the dict
    rpn_cls_prob = prediction_dict['rpn_cls_prob']
    rpn_bbox_pred = prediction_dict['rpn_bbox_pred']

    proposal_prediction = RPNProposal(rpn_cls_prob, rpn_bbox_pred, all_anchors,
                                      im_shape, nms_thresh)

    pred_dict_final['all_anchors'] = tf.cast(all_anchors, tf.float32)
    pred_dict_final['gt_bboxes'] = gt_boxes
    prediction_dict['proposals'] = proposal_prediction['proposals']
    prediction_dict['scores'] = proposal_prediction['scores']

    # When training we use a separate module to calculate the target
    # values we want to output.
    (rpn_cls_target, rpn_bbox_target,
     rpn_max_overlap) = RPNTarget(all_anchors, num_anchors, gt_boxes, im_shape)

    prediction_dict['rpn_cls_target'] = rpn_cls_target
    prediction_dict['rpn_bbox_target'] = rpn_bbox_target

    pred_dict_final['rpn_prediction'] = prediction_dict
    scores = pred_dict_final['rpn_prediction']['scores']
    proposals = pred_dict_final['rpn_prediction']['proposals']

    pred_masks_watershed = tf.to_float(
        marker_watershed(scores,
                         proposals,
                         pred_masks,
                         min_score=bbox_min_score))

    # Loss is defined as rpn loss(class loss + bounding box loss) +
    # segmentation loss(default is the sum of soft dice and cross-entropy)
    rpn_loss = RPNLoss(prediction_dict)

    RPN_loss = rpn_loss['rpn_cls_loss'] + rpn_loss['rpn_reg_loss']
    SEG_loss = segmentation_loss(final_logits,
                                 pred_masks_watershed,
                                 labels,
                                 edge_weights,
                                 mode='COMBO')

    final_loss = RPN_loss + SEG_loss

    # If training with just U-Net, then only include segmentation loss
    #final_loss = SEG_loss

    # Metrics are pixel accuracy, mean IU, mean accuracy, root mean squared error
    metrics, metrics_op = compute_metrics(pred_masks, labels)

    pred_dict_final['unet_mask'] = pred_masks

    # get the optimizer
    gen_train_op = optimizer_fun(optimizer,
                                 final_loss,
                                 learning_rate=learning_rate)

    # start point for training, and end point for graph
    sess = tf.Session()

    sess.run(tf.global_variables_initializer())

    num_batches = len(x_train)
    num_batches_val = len(x_val)

    saver = tf.train.Saver()

    if normalization_method == 'wn':
        self.training_results.set('Start whole image Norm. training ...')
        self.window.update()

    if normalization_method == 'fg':
        self.training_results.set('Start Foreground Norm. training ...')
        self.window.update()

    # training images indexes will be shuffled at every epoch during training
    idx = np.arange(num_batches)

    best_IU = 0
    for iteration in range(0, num_epoch):
        # The batch pointer to validation data
        j = 0
        sess.run(tf.local_variables_initializer())
        if iteration == num_epoch - 1 and normalization_method == 'wn':
            self.whole_norm_y_pred = []

        # shuffle the sequence of the training data for the current epoch
        np.random.shuffle(idx)

        for i in tqdm(range(0, num_batches)):
            self.train_progress_var.set(i / num_batches * 100)
            self.window.update()
            # Generate the batch data from training data and training label
            batch_data = x_train[idx[i]]
            batch_data_shape = batch_data.shape
            batch_data = np.reshape(
                batch_data, [1, batch_data_shape[0], batch_data_shape[1], 1])
            batch_label = np.reshape(
                y_train[idx[i]],
                [1, batch_data_shape[0], batch_data_shape[1], 1])
            batch_edge = np.reshape(
                w_train[idx[i]],
                [1, batch_data_shape[0], batch_data_shape[1], 1])
            batch_bbox = bbox_train[idx[i]]

            # Skip if this batch does not contain any object (bounding box is null)
            if batch_bbox.size > 0:
                # Here include the optimizer to actually perform learning
                sess.run(
                    [gen_train_op],
                    feed_dict={
                        train_initial: batch_data,
                        gt_boxes: batch_bbox,
                        labels: batch_label,
                        edge_weights: batch_edge
                    })

                # Only calculate the accuracy and loss after the training epoch
                if i == num_batches - 1:
                    while j < num_batches_val:
                        # Generate the batch data from val data and val label

                        batch_data = x_val[j]
                        batch_data_shape = batch_data.shape
                        batch_data = np.reshape(
                            batch_data,
                            [1, batch_data_shape[0], batch_data_shape[1], 1])
                        batch_label = np.reshape(
                            y_val[j],
                            [1, batch_data_shape[0], batch_data_shape[1], 1])
                        batch_edge = np.reshape(
                            w_val[j],
                            [1, batch_data_shape[0], batch_data_shape[1], 1])
                        batch_bbox = bbox_val[j]

                        # At the end of whole image normalization training,
                        # cache the predictions
                        if iteration == num_epoch - 1 and normalization_method == 'wn':
                            self.whole_norm_y_pred.append(
                                sess.run(pred_masks,
                                         feed_dict={
                                             train_initial: batch_data,
                                             gt_boxes: batch_bbox,
                                             labels: batch_label,
                                             edge_weights: batch_edge
                                         }))

                        if batch_bbox.size > 0:
                            # Here get the accuracy and loss for each batch in validation cycle
                            loss_temp, rpnloss_temp, segloss_temp = sess.run(
                                [final_loss, rpn_loss, SEG_loss],
                                feed_dict={
                                    train_initial: batch_data,
                                    gt_boxes: batch_bbox,
                                    labels: batch_label,
                                    edge_weights: batch_edge
                                })

                            sess.run(
                                [metrics_op],
                                feed_dict={
                                    train_initial: batch_data,
                                    gt_boxes: batch_bbox,
                                    labels: batch_label,
                                    edge_weights: batch_edge
                                })

                            if j == num_batches_val - 1:
                                metrics_all = sess.run(metrics,
                                                       feed_dict={
                                                           train_initial:
                                                           batch_data,
                                                           gt_boxes:
                                                           batch_bbox,
                                                           labels: batch_label,
                                                           edge_weights:
                                                           batch_edge
                                                       })

                                _mean_IU = metrics_all['global']['mean_IU']
                                _pixel_accuracy = metrics_all['global'][
                                    'pixel_accuracy']
                                _f1 = 2 * _mean_IU / (1 + _mean_IU)
                                _rmse = metrics_all['global']['rmse']

                            # Get moving average of metrics and losses
                            if j == 0:
                                loss_total = loss_temp
                                cls_loss = rpnloss_temp['rpn_cls_loss']
                                reg_loss = rpnloss_temp['rpn_reg_loss']
                                seg_loss = segloss_temp

                            else:
                                loss_total = (1 - 1 /
                                              (j + 1)) * loss_total + 1 / (
                                                  j + 1) * loss_temp
                                cls_loss = (1 - 1 / (j + 1)) * cls_loss + 1 / (
                                    j + 1) * rpnloss_temp['rpn_cls_loss']
                                reg_loss = (1 - 1 / (j + 1)) * reg_loss + 1 / (
                                    j + 1) * rpnloss_temp['rpn_reg_loss']
                                seg_loss = (1 - 1 / (j + 1)) * seg_loss + 1 / (
                                    j + 1) * segloss_temp

                            j = j + 1

        print(
            'Epoch: %d - loss: %.2f - cls_loss: %.2f - reg_loss: %.2f - seg_loss: %.2f - mean_IU: %.4f - f1: %.4f - pixel_accuracy: %.4f'
            % (iteration, loss_total, cls_loss, reg_loss, seg_loss, _mean_IU,
               _f1, _pixel_accuracy))

        self.training_results.set('Epoch ' + str(iteration) + ', loss ' +
                                  '{0:.2f}'.format(loss_total) + ', mean IU ' +
                                  '{0:.2f}'.format(_mean_IU))
        self.window.update()

        # Keep track of the best model in the last 10 epoches and use that as the best model

        if iteration >= num_epoch - 10 and normalization_method == 'wn' and _mean_IU > best_IU:
            best_IU = _mean_IU
            saver.save(sess, './Network/whole_norm.ckpt')

        if iteration >= num_epoch - 10 and normalization_method == 'fg' and _mean_IU > best_IU:
            best_IU = _mean_IU
            saver.save(sess, './Network/foreground.ckpt')

    sess.close()
Esempio n. 19
0
def test(params, self):
    """Predict masks for all images in a given directory, and save them  

    Args:
        params (dict): the parameters of the network
    """

    # Get the testing parameters
    perform_watershed = params['watershed']
    bbox_min_score = params['min_score']
    nms_thresh = params['nms_threshold']
    postProcess = params['postProcess']
    resize_scale = params['scale_ratio']

    # Load the data
    # x_test, y_test: test images and corresponding labels
    x_id, x_test = load_data_test(self.batch_seg_path)
    # pred_dict and pred_dict_final save all the temp variables
    pred_dict_final = {}

    train_initial = tf.placeholder(dtype=tf.float32, shape=[1, None, None, 1])

    input_shape = tf.shape(train_initial)

    input_height = input_shape[1]
    input_width = input_shape[2]
    im_shape = tf.cast([input_height, input_width], tf.float32)

    # number of classes needed to be classified, for our case this equals to 2
    # (foreground and background)
    nb_classes = 2

    # feed the initial image to U-Net, we expect 2 outputs:
    # 1. feat_map of shape (?,hf,wf,1024), which will be passed to the
    # region proposal network
    # 2. final_logits of shape(?,h,w,2), which is the prediction from U-net
    with tf.variable_scope('model_U-Net') as scope:
        final_logits, feat_map = UNET(nb_classes, train_initial)

    # The final_logits has 2 channels for foreground/background softmax scores,
    # then we get prediction with larger score for each pixel
    pred_masks = tf.argmax(final_logits, axis=3)
    pred_masks = tf.reshape(pred_masks, [input_height, input_width])
    pred_masks = tf.to_float(pred_masks)

    # Dynamic anchor base size calculated from median cell lengths
    base_size = anchor_size(tf.reshape(pred_masks,
                                       [input_height, input_width]))

    # scales and ratios are used to generate different anchors
    scales = np.array([0.5, 1, 2])
    ratios = np.array([0.125, 0.25, 0.5, 1, 2, 4, 8])

    # stride is to control how sparse we want to place anchors across the image
    # stride = 16 means to place an anchor every 16 pixels on the original image
    stride = 16

    # Generate the anchor reference with respect to the original image
    ref_anchors = generate_anchors_reference(base_size, ratios, scales)
    num_ref_anchors = scales.shape[0] * ratios.shape[0]

    feat_height = input_height / stride
    feat_width = input_width / stride

    # Generate all the anchors based on ref_anchors
    all_anchors = generate_anchors(ref_anchors, stride,
                                   [feat_height, feat_width])

    num_anchors = all_anchors.shape[0]
    with tf.variable_scope('model_RPN') as scope:
        prediction_dict = RPN(feat_map, num_ref_anchors)

    # Get the tensors from the dict
    rpn_cls_prob = prediction_dict['rpn_cls_prob']
    rpn_bbox_pred = prediction_dict['rpn_bbox_pred']

    proposal_prediction = RPNProposal(rpn_cls_prob, rpn_bbox_pred, all_anchors,
                                      im_shape, nms_thresh)

    pred_dict_final['all_anchors'] = tf.cast(all_anchors, tf.float32)
    prediction_dict['proposals'] = proposal_prediction['proposals']
    prediction_dict['scores'] = proposal_prediction['scores']

    pred_dict_final['rpn_prediction'] = prediction_dict
    scores = pred_dict_final['rpn_prediction']['scores']
    proposals = pred_dict_final['rpn_prediction']['proposals']

    pred_masks_watershed = tf.to_float(
        marker_watershed(scores,
                         proposals,
                         pred_masks,
                         min_score=bbox_min_score))

    # start point for testing, and end point for graph
    sess = tf.Session()

    sess.run(tf.global_variables_initializer())

    num_batches_test = len(x_test)

    saver = tf.train.Saver()

    masks1 = []
    # Restore the per-image normalization model from the trained network
    saver.restore(sess, './Network/whole_norm.ckpt')
    sess.run(tf.local_variables_initializer())
    for j in tqdm(range(0, num_batches_test)):
        # whole image normalization
        batch_data = x_test[j]
        batch_data_shape = batch_data.shape
        image = np.reshape(batch_data,
                           [batch_data_shape[0], batch_data_shape[1]])

        if resize_scale != 1:
            image = rescale(image,
                            self.params['scale_ratio'],
                            anti_aliasing=True)

        # Clip the height and width to be 16-fold
        imheight, imwidth = image.shape
        imheight = imheight // 16 * 16
        imwidth = imwidth // 16 * 16
        image = image[:imheight, :imwidth]

        image_normalized_wn = whole_image_norm(image)
        image_normalized_wn = np.reshape(image_normalized_wn,
                                         [1, imheight, imwidth, 1])

        masks = sess.run(pred_masks,
                         feed_dict={train_initial: image_normalized_wn})
        self.progress_var.set(j / 2 / num_batches_test * 100)
        self.window.update()

        # First pass, get the coarse masks, and normalize the image on masks
        masks1.append(masks)

    # Restore the foreground normalization model from the trained network
    saver.restore(sess, './Network/foreground.ckpt')

    sess.run(tf.local_variables_initializer())
    for j in tqdm(range(0, num_batches_test)):
        batch_data = x_test[j]
        batch_data_shape = batch_data.shape
        image = np.reshape(batch_data,
                           [batch_data_shape[0], batch_data_shape[1]])

        if resize_scale != 1:
            image = rescale(image, self.params['scale_ratio'])

        # Clip the height and width to be 16-fold
        imheight, imwidth = image.shape
        imheight = imheight // 16 * 16
        imwidth = imwidth // 16 * 16
        image = image[:imheight, :imwidth]

        # Final pass, foreground normalization to get final masks
        image_normalized_fg = foreground_norm(image, masks1[j])
        image_normalized_fg = np.reshape(image_normalized_fg,
                                         [1, imheight, imwidth, 1])

        # If adding watershed, we save the watershed masks separately
        if perform_watershed == 'yes':

            masks_watershed = sess.run(
                pred_masks_watershed,
                feed_dict={train_initial: image_normalized_fg})

            if postProcess == 'yes':
                masks_watershed = clean_image(masks_watershed)

            # Revert the scale to original display
            if resize_scale != 1:
                masks_watershed = rescale(masks_watershed,
                                          1 / self.params['scale_ratio'])

            I8 = (((masks_watershed - masks_watershed.min()) /
                   (masks_watershed.max() - masks_watershed.min())) *
                  255).astype(np.uint8)
            img = Image.fromarray(I8)
            img.save(self.batch_seg_path + x_id[j] + '_masks_watershed.png')

        else:

            masks = sess.run(pred_masks,
                             feed_dict={train_initial: image_normalized_fg})

            if postProcess == 'yes':
                masks = clean_image(masks)

            # enable these 2 lines if your want to see the detection result
            #image_pil = draw_top_nms_proposals(pred_dict, batch_data, min_score=bbox_min_score, draw_gt=False)
            #image_pil.save(str(j)+'_pred.png')

            # Revert the scale to original display
            if resize_scale != 1:
                masks = rescale(masks, 1 / self.params['scale_ratio'])

            I8 = (((masks - masks.min()) / (masks.max() - masks.min())) *
                  255).astype(np.uint8)
            img = Image.fromarray(I8)
            img.save(self.batch_seg_path + x_id[j] + '_masks.png')

        self.progress_var.set(50 + j / 2 / num_batches_test * 100)
        self.window.update()
    sess.close()
Esempio n. 20
0
def test_single_img(params, x_test):
    """input the image, return the segmented mask

    Args:
        params (dict): the parameters of the network
        x_test: the input image in numpy array
    """

    # Get the testing parameters
    perform_watershed = params['watershed']
    bbox_min_score = params['min_score']
    nms_thresh = params['nms_threshold']
    postProcess = params['postProcess']

    # pred_dict and pred_dict_final save all the temp variables
    pred_dict_final = {}

    train_initial = tf.placeholder(dtype=tf.float32, shape=[1, None, None, 1])

    input_shape = tf.shape(train_initial)

    input_height = input_shape[1]
    input_width = input_shape[2]
    im_shape = tf.cast([input_height, input_width], tf.float32)

    # number of classes needed to be classified, for our case this equals to 2
    # (foreground and background)
    nb_classes = 2

    # feed the initial image to U-Net, we expect 2 outputs:
    # 1. feat_map of shape (?,32,32,1024), which will be passed to the
    # region proposal network
    # 2. final_logits of shape(?,512,512,2), which is the prediction from U-net
    with tf.variable_scope('model_U-Net') as scope:
        final_logits, feat_map = UNET(nb_classes, train_initial)

    # The final_logits has 2 channels for foreground/background softmax scores,
    # then we get prediction with larger score for each pixel
    pred_masks = tf.argmax(final_logits, axis=3)
    pred_masks = tf.reshape(pred_masks, [input_height, input_width])
    pred_masks = tf.to_float(pred_masks)

    # Dynamic anchor base size calculated from median cell lengths
    base_size = anchor_size(tf.reshape(pred_masks,
                                       [input_height, input_width]))

    # scales and ratios are used to generate different anchors
    scales = np.array([0.5, 1, 2])
    ratios = np.array([0.125, 0.25, 0.5, 1, 2, 4, 8])

    # stride is to control how sparse we want to place anchors across the image
    # stride = 16 means to place an anchor every 16 pixels on the original image
    stride = 16

    # Generate the anchor reference with respect to the original image
    ref_anchors = generate_anchors_reference(base_size, ratios, scales)
    num_ref_anchors = scales.shape[0] * ratios.shape[0]

    feat_height = input_height / stride
    feat_width = input_width / stride

    # Generate all the anchors based on ref_anchors
    all_anchors = generate_anchors(ref_anchors, stride,
                                   [feat_height, feat_width])

    num_anchors = all_anchors.shape[0]
    with tf.variable_scope('model_RPN') as scope:
        prediction_dict = RPN(feat_map, num_ref_anchors)

    # Get the tensors from the dict
    rpn_cls_prob = prediction_dict['rpn_cls_prob']
    rpn_bbox_pred = prediction_dict['rpn_bbox_pred']

    proposal_prediction = RPNProposal(rpn_cls_prob, rpn_bbox_pred, all_anchors,
                                      im_shape, nms_thresh)

    pred_dict_final['all_anchors'] = tf.cast(all_anchors, tf.float32)
    prediction_dict['proposals'] = proposal_prediction['proposals']
    prediction_dict['scores'] = proposal_prediction['scores']

    pred_dict_final['rpn_prediction'] = prediction_dict
    scores = pred_dict_final['rpn_prediction']['scores']
    proposals = pred_dict_final['rpn_prediction']['proposals']

    pred_masks_watershed = tf.to_float(
        marker_watershed(scores,
                         proposals,
                         pred_masks,
                         min_score=bbox_min_score))

    # start point for testing, and end point for graph

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    num_batches_test = len(x_test)

    saver = tf.train.Saver()

    masks1 = []

    # Restore the per-image normalization model from the trained network
    saver.restore(sess, './Network/whole_norm.ckpt')
    #saver.restore(sess,'./Network/whole_norm_weights_fluorescent/'+str(3)+'.ckpt')
    sess.run(tf.local_variables_initializer())
    for j in tqdm(range(0, num_batches_test)):
        # whole image normalization
        batch_data = x_test[j]
        batch_data_shape = batch_data.shape
        image_normalized_wn = whole_image_norm(batch_data)
        image_normalized_wn = np.reshape(
            image_normalized_wn,
            [1, batch_data_shape[0], batch_data_shape[1], 1])

        masks = sess.run(pred_masks,
                         feed_dict={train_initial: image_normalized_wn})

        # First pass, get the coarse masks, and normalize the image on masks
        masks1.append(masks)

    # Restore the foreground normalization model from the trained network
    saver.restore(sess, './Network/foreground.ckpt')
    #saver.restore(sess,'./Network/fg_norm_weights_fluorescent/'+str(30)+'.ckpt')
    sess.run(tf.local_variables_initializer())
    for j in tqdm(range(0, num_batches_test)):
        batch_data = x_test[j]
        batch_data_shape = batch_data.shape
        image = np.reshape(batch_data,
                           [batch_data_shape[0], batch_data_shape[1]])

        # Final pass, foreground normalization to get final masks
        image_normalized_fg = foreground_norm(image, masks1[j])
        image_normalized_fg = np.reshape(
            image_normalized_fg,
            [1, batch_data_shape[0], batch_data_shape[1], 1])

        # If adding watershed, we save the watershed masks separately
        if perform_watershed == 'yes':
            masks = sess.run(pred_masks_watershed,
                             feed_dict={train_initial: image_normalized_fg})

            if postProcess == 'yes':
                masks = clean_image(masks)

        else:
            masks = sess.run(pred_masks,
                             feed_dict={train_initial: image_normalized_fg})

            if postProcess == 'yes':
                masks = clean_image(masks)

    sess.close()

    return masks