Example #1
0
def sample_pdf(bins, weights, N_samples, det=False):
    # Get pdf
    weights = weights + 1e-5  # prevent nans
    pdf = weights / jt.sum(weights, -1, keepdims=True)
    cdf = jt.cumsum(pdf, -1)
    cdf = jt.concat([jt.zeros_like(cdf[..., :1]), cdf],
                    -1)  # (batch, len(bins))

    # Take uniform samples
    if det:
        u = jt.linspace(0., 1., steps=N_samples)
        u = u.expand(list(cdf.shape[:-1]) + [N_samples])
    else:
        u = jt.random(list(cdf.shape[:-1]) + [N_samples])

    # Invert CDF
    inds = jt.searchsorted(cdf, u, right=True)
    below = jt.maximum(jt.zeros_like(inds - 1), inds - 1)
    above = jt.minimum((cdf.shape[-1] - 1) * jt.ones_like(inds), inds)
    inds_g = jt.stack([below, above], -1)  # (batch, N_samples, 2)

    matched_shape = [inds_g.shape[0], inds_g.shape[1], cdf.shape[-1]]
    cdf_g = jt.gather(cdf.unsqueeze(1).expand(matched_shape), 2, inds_g)
    bins_g = jt.gather(bins.unsqueeze(1).expand(matched_shape), 2, inds_g)

    denom = (cdf_g[..., 1] - cdf_g[..., 0])
    denom[denom < 1e-5] = 1.0
    t = (u - cdf_g[..., 0]) / denom
    samples = bins_g[..., 0] + t * (bins_g[..., 1] - bins_g[..., 0])

    return samples
    def execute(self, inputs, targets, mask=None, act=False):
        losses = []
        for id in range(len(inputs)):
            if mask is not None:
                input_flatten, target_flatten = self.flatten(
                    inputs[id], targets[id], mask[id])
            else:
                input_flatten, target_flatten = self.flatten(
                    inputs[id], targets[id])
            if act:
                MIN = 1e-9
                input_flatten = jt.clamp(input_flatten,
                                         min_v=MIN,
                                         max_v=1 - MIN)
                input_flatten = jt.log(input_flatten) - jt.log(1 -
                                                               input_flatten)
            losses.append(self.lovasz_hinge_flat(input_flatten,
                                                 target_flatten))
        losses = jt.stack(losses)
        if self.reduction == "mean":
            losses = losses.mean()
        elif self.reduction == "sum":
            losses = losses.sum()

        return losses
Example #3
0
def make_grid(x,
              nrow=8,
              padding=2,
              normalize=False,
              range=None,
              scale_each=False,
              pad_value=0):
    assert isinstance(range, tuple) or range is None
    assert scale_each == False
    if isinstance(x, list): x = jt.stack(x)
    assert isinstance(x, jt.Var)
    if x.ndim < 4: return x
    if x.ndim == 4 and x.shape[0] <= 1: return x
    nrow = min(nrow, x.shape[0])
    if normalize:
        if range is None: x = (x - x.min()) / (x.max() - x.min())
        else: x = (x - range[0]) / (range[1] - range[0])
    b, c, h, w = x.shape
    ncol = math.ceil(b / nrow)
    return x.reindex(
        [c, h * ncol + (ncol + 1) * padding, w * nrow + (nrow + 1) * padding],
        [
            f"i1/{padding+h}*{nrow}+i2/{padding+w}", "i0",
            f"i1-i1/{padding+h}*{padding+h}-{padding}",
            f"i2-i2/{padding+w}*{padding+w}-{padding}"
        ],
        overflow_value=pad_value)
Example #4
0
def collate_batch(batch):
    r"""Puts each data field into a tensor with outer dimension batch size"""
    real_size = len(batch)
    elem = batch[0]
    elem_type = type(elem)
    if isinstance(elem, jt.Var):
        temp_data = jt.stack([data for data in batch], 0)
        return temp_data
    if elem_type is np.ndarray:
        temp_data = np.stack([data for data in batch], 0)
        return temp_data
    elif np.issubdtype(elem_type, np.integer):
        return np.int32(batch)
    elif isinstance(elem, int):
        return np.int32(batch)
    elif isinstance(elem, float):
        return np.float32(batch)
    elif isinstance(elem, str):
        return batch
    elif isinstance(elem, Mapping):
        return {key: collate_batch([d[key] for d in batch]) for key in elem}
    elif isinstance(elem, tuple):
        transposed = zip(*batch)
        return tuple(collate_batch(samples) for samples in transposed)
    elif isinstance(elem, Sequence):
        transposed = zip(*batch)
        return [collate_batch(samples) for samples in transposed]
    elif isinstance(elem, Image.Image):
        temp_data = np.stack([np.array(data) for data in batch], 0)
        return temp_data
    else:
        raise TypeError(f"Not support type <{elem_type.__name__}>")
Example #5
0
def project_masks_on_boxes(segmentation_masks, proposals, discretization_size):
    """
    Given segmentation masks and the bounding boxes corresponding
    to the location of the masks in the image, this function
    crops and resizes the masks in the position defined by the
    boxes. This prepares the masks for them to be fed to the
    loss computation as the targets.

    Arguments:
        segmentation_masks: an instance of SegmentationMask
        proposals: an instance of BoxList
    """
    masks = []
    M = discretization_size
    device = proposals.bbox.device
    proposals = proposals.convert("xyxy")
    assert segmentation_masks.size == proposals.size, "{}, {}".format(
        segmentation_masks, proposals)

    # FIXME: CPU computation bottleneck, this should be parallelized
    proposals = proposals.bbox
    for segmentation_mask, proposal in zip(segmentation_masks, proposals):
        # crop the masks, resize them to the desired resolution and
        # then convert them to the tensor representation.
        cropped_mask = segmentation_mask.crop(proposal)
        scaled_mask = cropped_mask.resize((M, M))
        mask = scaled_mask.get_mask_tensor()
        masks.append(mask)
    if len(masks) == 0:
        return jt.zeros(0).float32()
    return jt.stack(masks, dim=0).float32()
Example #6
0
    def test_lstm_cell(self):
        np_h0 = torch.randn(3, 20).numpy()
        np_c0 = torch.randn(3, 20).numpy()

        t_rnn = tnn.LSTMCell(10, 20) 
        input = torch.randn(2, 3, 10)
        h0 = torch.from_numpy(np_h0)
        c0 = torch.from_numpy(np_c0)
        t_output = []
        for i in range(input.size()[0]):
            h0, c0 = t_rnn(input[i], (h0, c0))
            t_output.append(h0)
        t_output = torch.stack(t_output, dim=0)

        j_rnn = nn.LSTMCell(10, 20)
        j_rnn.load_state_dict(t_rnn.state_dict())

        input = jt.float32(input.numpy())
        h0 = jt.float32(np_h0)
        c0 = jt.float32(np_c0)
        j_output = []
        for i in range(input.size()[0]):
            h0, c0 = j_rnn(input[i], (h0, c0))
            j_output.append(h0)
        j_output = jt.stack(j_output, dim=0)

        t_output = t_output.detach().numpy()
        j_output = j_output.data
        assert np.allclose(t_output, j_output, rtol=1e-03, atol=1e-06)
Example #7
0
 def test_stack(self):
     arr1 = np.random.randn(16, 3, 224, 224)
     arr2 = np.random.randn(16, 3, 224, 224)
     check_equal(torch.stack(
         [torch.Tensor(arr1), torch.Tensor(arr2)], 0),
                 jt.stack([jt.array(arr1), jt.array(arr2)], 0))
     print('pass stack test ...')
Example #8
0
def knn_indices_func_gpu(
        rep_pts,  # (N, pts, dim)
        pts,  # (N, x, dim)
        k: int,
        d: int):  # (N, pts, K)
    """
    GPU-based Indexing function based on K-Nearest Neighbors search.
    Very memory intensive, and thus unoptimal for large numbers of points.
    :param rep_pts: Representative points.
    :param pts: Point cloud to get indices from.
    :param K: Number of nearest neighbors to collect.
    :param D: "Spread" of neighboring points.
    :return: Array of indices, P_idx, into pts such that pts[n][P_idx[n],:]
    is the set k-nearest neighbors for the representative points in pts[n].
    """
    region_idx = []
    batch_size = rep_pts.shape[0]
    for idx in range(batch_size):
        qry = rep_pts[idx]
        ref = pts[idx]
        n, d = ref.shape
        m, d = qry.shape
        mref = ref.view(1, n, d).repeat(m, 1, 1)
        mqry = qry.view(m, 1, d).repeat(1, n, 1)

        dist2 = jt.sum((mqry - mref)**2, 2)  # pytorch has squeeze
        _, inds = topk(dist2, k * d + 1, dim=1, largest=False)

        region_idx.append(inds[:, 1::d])

    region_idx = jt.stack(region_idx, dim=0)

    return region_idx
Example #9
0
def prepare_data(datum):
    with jt.no_grad():
        images, (targets, masks, num_crowds) = datum
        if not isinstance(images[0], jt.Var):
            images = [jt.array(image, dtype='float32') for image in images]

        if not isinstance(targets[0], jt.Var):
            targets = [jt.array(t, dtype='float32') for t in targets]

        if not isinstance(masks[0], jt.Var):
            masks = [jt.array(m, dtype='float32') for m in masks]

        for cur_idx in range(args.batch_size):
            images[cur_idx] = gradinator(images[cur_idx])
            targets[cur_idx] = gradinator(targets[cur_idx])
            masks[cur_idx] = gradinator(masks[cur_idx])

        if cfg.preserve_aspect_ratio:
            # Choose a random size from the batch
            _, h, w = images[random.randint(0, len(images) - 1)].shape

            for idx, (image, target, mask, num_crowd) in enumerate(
                    zip(images, targets, masks, num_crowds)):
                images[idx], targets[idx], masks[idx], num_crowds[idx] \
                    = enforce_size(image, target, mask, num_crowd, w, h)

        return jt.stack(images, dim=0), targets, masks, num_crowds
Example #10
0
def quat_conjugate(quat):
    q0 = quat[:, :, 0]
    q1 = ((- 1) * quat[:, :, 1])
    q2 = ((- 1) * quat[:, :, 2])
    q3 = ((- 1) * quat[:, :, 3])
    q_conj = jt.stack([q0, q1, q2, q3], dim=2)
    return q_conj
 def get_sample_region(self, gt, strides, num_points_per, gt_xs, gt_ys, radius=1):
     num_gts = gt.shape[0]
     K = len(gt_xs)
     gt = gt[None].expand(K, num_gts, 4)
     center_x = (gt[..., 0] + gt[..., 2]) / 2
     center_y = (gt[..., 1] + gt[..., 3]) / 2
     center_gt = gt.new_zeros(gt.shape)
     # no gt
     if center_x[..., 0].sum() == 0:
         return gt_xs.new_zeros(gt_xs.shape, dtype='uint8')
     beg = 0
     for level, n_p in enumerate(num_points_per):
         end = beg + n_p
         stride = strides[level] * radius
         xmin = center_x[beg:end] - stride
         ymin = center_y[beg:end] - stride
         xmax = center_x[beg:end] + stride
         ymax = center_y[beg:end] + stride
         # limit sample region in gt
         center_gt[beg:end, :, 0] = jt.ternary(xmin > gt[beg:end, :, 0], xmin, gt[beg:end, :, 0])
         center_gt[beg:end, :, 1] = jt.ternary(ymin > gt[beg:end, :, 1], ymin, gt[beg:end, :, 1])
         center_gt[beg:end, :, 2] = jt.ternary(xmax > gt[beg:end, :, 2], gt[beg:end, :, 2], xmax)
         center_gt[beg:end, :, 3] = jt.ternary(ymax > gt[beg:end, :, 3], gt[beg:end, :, 3], ymax)
         beg = end
     left = gt_xs[:, None] - center_gt[..., 0]
     right = center_gt[..., 2] - gt_xs[:, None]
     top = gt_ys[:, None] - center_gt[..., 1]
     bottom = center_gt[..., 3] - gt_ys[:, None]
     center_bbox = jt.stack((left, top, right, bottom), -1)
     inside_gt_bbox_mask = center_bbox.min(-1)[0] > 0
     return inside_gt_bbox_mask
    def encode(self, reference_boxes, proposals):
        """
        Encode a set of proposals with respect to some
        reference boxes

        Arguments:
            reference_boxes (Tensor): reference boxes
            proposals (Tensor): boxes to be encoded
        """

        TO_REMOVE = 1  # TODO remove
        ex_widths = proposals[:, 2] - proposals[:, 0] + TO_REMOVE
        ex_heights = proposals[:, 3] - proposals[:, 1] + TO_REMOVE
        ex_ctr_x = proposals[:, 0] + 0.5 * ex_widths
        ex_ctr_y = proposals[:, 1] + 0.5 * ex_heights

        gt_widths = reference_boxes[:, 2] - reference_boxes[:, 0] + TO_REMOVE
        gt_heights = reference_boxes[:, 3] - reference_boxes[:, 1] + TO_REMOVE
        gt_ctr_x = reference_boxes[:, 0] + 0.5 * gt_widths
        gt_ctr_y = reference_boxes[:, 1] + 0.5 * gt_heights

        wx, wy, ww, wh = self.weights
        targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths
        targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights
        targets_dw = ww * jt.log(gt_widths / ex_widths)
        targets_dh = wh * jt.log(gt_heights / ex_heights)

        targets = jt.stack((targets_dx, targets_dy, targets_dw, targets_dh), dim=1)
        return targets
 def compute_locations_per_level(self, h, w, stride):
     shifts_x = jt.arange(0, w * stride, step=stride, dtype='float32')
     shifts_y = jt.arange(0, h * stride, step=stride, dtype='float32')
     shift_y, shift_x = jt.meshgrid(shifts_y, shifts_x)
     shift_x = shift_x.reshape(-1)
     shift_y = shift_y.reshape(-1)
     locations = jt.stack((shift_x, shift_y), dim=1) + stride // 2
     return locations
Example #14
0
def projection(vertices, K, R, t, dist_coeffs, orig_size, eps=1e-9):
    '''
    Calculate projective transformation of vertices given a projection matrix
    Input parameters:
    K: batch_size * 3 * 3 intrinsic camera matrix
    R, t: batch_size * 3 * 3, batch_size * 1 * 3 extrinsic calibration parameters
    dist_coeffs: vector of distortion coefficients
    orig_size: original size of image captured by the camera
    Returns: For each point [X,Y,Z] in world coordinates [u,v,z] where u,v are the coordinates of the projection in
    pixels and z is the depth
    '''

    # instead of P*x we compute x'*P'
    vertices = jt.matmul(vertices, R.transpose((0, 2, 1))[0]) + t
    x, y, z = vertices[:, :, 0], vertices[:, :, 1], vertices[:, :, 2]
    x_ = x / (z + eps)
    y_ = y / (z + eps)

    # Get distortion coefficients from vector
    k1 = dist_coeffs[:, 0].unsqueeze(1)
    k2 = dist_coeffs[:, 1].unsqueeze(1)
    p1 = dist_coeffs[:, 2].unsqueeze(1)
    p2 = dist_coeffs[:, 3].unsqueeze(1)
    k3 = dist_coeffs[:, 4].unsqueeze(1)

    # we use x_ for x' and x__ for x'' etc.
    x_2 = x_.sqr()
    y_2 = y_.sqr()
    r = jt.sqrt(x_2 + y_2)
    r2 = r.sqr()
    r4 = r2.sqr()
    r6 = r4 * r2

    tmp = k1 * (r2) + k2 * (r4) + k3 * (r6) + 1
    x__ = x_ * tmp + 2 * p1 * x_ * y_ + p2 * (r2 + 2 * x_2)
    y__ = y_ * tmp + p1 * (r2 + 2 * y_2) + 2 * p2 * x_ * y_

    vertices = jt.stack([x__, y__, jt.ones(z.shape)], dim=-1)
    vertices = jt.matmul(vertices, K.transpose((0, 2, 1))[0])
    u, v = vertices[:, :, 0], vertices[:, :, 1]
    v = orig_size - v
    # map u,v from [0, img_size] to [-1, 1] to use by the renderer
    u = 2 * (u - orig_size / 2.) / orig_size
    v = 2 * (v - orig_size / 2.) / orig_size
    vertices = jt.stack([u, v, z], dim=-1)
    return vertices
Example #15
0
    def select_region(self, pts,  # (N, x, dims)
                      pts_idx): # (P, K, dims)

        regions = jt.stack([
            pts[n][idx,:] for n, idx in enumerate(jt.misc.unbind(pts_idx, dim = 0))
        ], dim = 0)

        return regions
    def convert_to_binarymask(self):
        if len(self) > 0:
            masks = jt.stack(
                [p.convert_to_binarymask() for p in self.polygons])
        else:
            size = self.size
            masks = jt.empty([0, size[1], size[0]]).bool()

        return BinaryMaskList(masks, size=self.size)
Example #17
0
def get_rays(H, W, focal, c2w, intrinsic=None):
    i, j = jt.meshgrid(jt.linspace(0, W - 1, W), jt.linspace(0, H - 1, H))
    i = i.t()
    j = j.t()
    if intrinsic is None:
        dirs = jt.stack([(i - W * .5) / focal, (j - H * .5) / focal,
                         jt.ones_like(i)], -1).unsqueeze(-2)
    else:
        i += 0.5
        j += 0.5
        dirs = jt.stack([i, j, jt.ones_like(i)], -1).unsqueeze(-2)
        dirs = jt.sum(dirs * intrinsic[:3, :3], -1).unsqueeze(-2)
    # Rotate ray directions from camera frame to the world frame
    rays_d = jt.sum(
        dirs * c2w[:3, :3],
        -1)  # dot product, equals to: [c2w.dot(dir) for dir in dirs]
    # Translate camera frame's origin to the world frame. It is the origin of all rays.
    rays_o = c2w[:3, -1].expand(rays_d.shape)
    return rays_o, rays_d
Example #18
0
def make_grid(x, nrow=8, padding=2, normalize=False, range=None, scale_each=False, pad_value=0):
    assert range == None
    assert scale_each == False
    if isinstance(x, list): x = jt.stack(x)
    if normalize: x = (x - x.min()) / (x.max() - x.min())
    b,c,h,w = x.shape
    ncol = math.ceil(b / nrow)
    return x.reindex([c, h*ncol+(ncol+1)*padding, w*nrow+(nrow+1)*padding], 
                     [f"i1/{padding+h}*{nrow}+i2/{padding+w}", "i0", 
                      f"i1-i1/{padding+h}*{padding+h}-{padding}", f"i2-i2/{padding+w}*{padding+w}-{padding}"], overflow_value=pad_value)
Example #19
0
def ndc_rays(H, W, focal, near, rays_o, rays_d):
    # Shift ray origins to near plane
    t = -(near + rays_o[..., 2]) / rays_d[..., 2]
    rays_o = rays_o + t.unsqueeze(-1) * rays_d

    # Projection
    o0 = -1. / (W / (2. * focal)) * rays_o[..., 0] / rays_o[..., 2]
    o1 = -1. / (H / (2. * focal)) * rays_o[..., 1] / rays_o[..., 2]
    o2 = 1. + 2. * near / rays_o[..., 2]

    d0 = -1. / (W / (2. * focal)) * (rays_d[..., 0] / rays_d[..., 2] -
                                     rays_o[..., 0] / rays_o[..., 2])
    d1 = -1. / (H / (2. * focal)) * (rays_d[..., 1] / rays_d[..., 2] -
                                     rays_o[..., 1] / rays_o[..., 2])
    d2 = -2. * near / rays_o[..., 2]

    rays_o = jt.stack([o0, o1, o2], -1)
    rays_d = jt.stack([d0, d1, d2], -1)

    return rays_o, rays_d
Example #20
0
def edge_index_from_dict(graph_dict, num_nodes=None, coalesce=False):
    row, col = [], []
    for key, value in graph_dict.items():
        row += repeat(key, len(value))
        col += value
    edge_index = jt.stack([jt.array(row), jt.array(col)], dim=0)
    if coalesce:
        # NOTE: There are some duplicated edges and self loops in the datasets.
        #       Other implementations do not remove them!
        edge_index, _ = remove_self_loops(edge_index)
        edge_index, _ = coalesce_fn(edge_index, None, num_nodes, num_nodes)
    return edge_index
Example #21
0
def get_textures_from_im(im, tx_size=1):
    b, c, h, w = im.shape
    if tx_size == 1:
        textures = jt.contrib.concat([
            im[:, :, :h - 1, :w - 1].reshape(b, c, -1),
            im[:, :, 1:, 1:].reshape(b, c, -1)
        ], 2)
        textures = textures.transpose(2, 1).reshape(b, -1, 1, 1, 1, c)
    elif tx_size == 2:
        textures1 = jt.stack([
            im[:, :, :h - 1, :w - 1], im[:, :, :h - 1, 1:], im[:, :,
                                                               1:, :w - 1]
        ], -1).reshape(b, c, -1, 3)
        textures2 = jt.stack(
            [im[:, :, 1:, :w - 1], im[:, :, :h - 1, 1:], im[:, :, 1:, 1:]],
            -1).reshape(b, c, -1, 3)
        textures = vcolor_to_texture_cube(
            jt.contrib.concat([textures1, textures2], 2))  # bxnx2x2x2xc
    else:
        raise NotImplementedError(
            "Currently support texture size of 1 or 2 only.")
    return textures
Example #22
0
def test():
    model.eval()
    logits, accs = model(), []
    for _, mask in data('train_mask', 'val_mask', 'test_mask'):
        y_ = data.y[mask]
        tmp = []
        for i in range(mask.shape[0]):
            if mask[i] == True:
                tmp.append(logits[i])
        logits_ = jt.stack(tmp)
        pred, _ = jt.argmax(logits_, dim=1)
        acc = pred.equal(y_).sum().item() / mask.sum().item()
        accs.append(acc)
    return accs
    def compute_targets_for_locations(self, locations, targets, object_sizes_of_interest):
        labels = []
        reg_targets = []
        xs, ys = locations[:, 0], locations[:, 1]

        for im_i in range(len(targets)):
            targets_per_im = targets[im_i]
            assert targets_per_im.mode == "xyxy"
            bboxes = targets_per_im.bbox
            labels_per_im = targets_per_im.get_field("labels")
            area = targets_per_im.area()

            l = xs[:, None] - bboxes[:, 0][None]
            t = ys[:, None] - bboxes[:, 1][None]
            r = bboxes[:, 2][None] - xs[:, None]
            b = bboxes[:, 3][None] - ys[:, None]
            reg_targets_per_im = jt.stack([l, t, r, b], dim=2)
            if self.center_sample:
                is_in_boxes = self.get_sample_region(
                    bboxes,
                    self.strides,
                    self.num_points_per_level,
                    xs,
                    ys,
                    radius=self.radius)
            else:
                is_in_boxes = reg_targets_per_im.min(dim=2)[0] > 0

            max_reg_targets_per_im = reg_targets_per_im.max(dim=2)[0]
            # limit the regression range for each location
            is_cared_in_the_level = \
                (max_reg_targets_per_im >= object_sizes_of_interest[:, [0]]) & \
                (max_reg_targets_per_im <= object_sizes_of_interest[:, [1]])

            locations_to_gt_area = area[None].repeat(len(locations), 1)
            locations_to_gt_area[is_in_boxes == 0] = INF
            locations_to_gt_area[is_cared_in_the_level == 0] = INF

            # if there are still more than one objects for a location,
            # we choose the one with minimal area
            locations_to_min_area, locations_to_gt_inds = locations_to_gt_area.min(dim=1)

            reg_targets_per_im = reg_targets_per_im[range(len(locations)), locations_to_gt_inds]
            labels_per_im = labels_per_im[locations_to_gt_inds]
            labels_per_im[locations_to_min_area == INF] = 0

            labels.append(labels_per_im)
            reg_targets.append(reg_targets_per_im)

        return labels, reg_targets
Example #24
0
    def prepare_targets(self, points, targets, im_w, im_h):
        object_sizes_of_interest = self.object_sizes_of_interest
        expanded_object_sizes_of_interest = []
        for l, points_per_level in enumerate(points):
            object_sizes_of_interest_per_level = \
                points_per_level.new_tensor(object_sizes_of_interest[l])
            expanded_object_sizes_of_interest.append(
                object_sizes_of_interest_per_level[None].expand(
                    len(points_per_level), -1))

        expanded_object_sizes_of_interest = jt.contrib.concat(
            expanded_object_sizes_of_interest, dim=0)
        num_points_per_level = [
            len(points_per_level) for points_per_level in points
        ]
        self.num_points_per_level = num_points_per_level
        points_all_level = jt.contrib.concat(points, dim=0)
        labels, reg_targets, matched_idxes = self.compute_targets_for_locations(
            points_all_level, targets, expanded_object_sizes_of_interest, im_w,
            im_h)

        labels_split = []
        reg_targets_split = []
        for i in range(len(labels)):
            labels_split.append(
                jt.split(labels[i], num_points_per_level, dim=0))
            reg_targets_split.append(
                jt.split(reg_targets[i], num_points_per_level, dim=0))

        labels_level_first = []
        reg_targets_level_first = []
        for level in range(len(points)):
            labels_level_first.append(
                jt.contrib.concat(
                    [labels_per_im[level] for labels_per_im in labels_split],
                    dim=0))
            reg_targets_per_level = \
                jt.contrib.concat([reg_targets_per_im[level] for reg_targets_per_im in reg_targets_split], dim=0)

            if self.norm_reg_targets:
                reg_targets_per_level = reg_targets_per_level / self.fpn_strides[
                    level]
            reg_targets_level_first.append(reg_targets_per_level)

        matched_idxes = jt.stack(matched_idxes)

        return labels_level_first, reg_targets_level_first, labels, reg_targets, matched_idxes
Example #25
0
    def forward_single_image(self, masks, boxes):
        boxes = boxes.convert("xyxy")
        im_w, im_h = boxes.size
        res = []
        for i in range(boxes.bbox.shape[0]):
            mask = masks[i]
            if mask.ndim == 3:
                mask = mask[0]
            res.append(
                paste_mask_in_image(mask, boxes.bbox[i], im_h, im_w,
                                    self.threshold, self.padding))

        if len(res) > 0:
            res = jt.stack(res, dim=0)[:].unsqueeze(1)
        else:
            res = masks.new_empty((0, 1, masks.shape[-2], masks.shape[-1]))
        return res
Example #26
0
    def grid_anchors(self, grid_sizes):
        anchors = []
        for size, stride, base_anchors in zip(grid_sizes, self.strides,
                                              self.cell_anchors):
            grid_height, grid_width = size
            shifts_x = jt.arange(0, grid_width * stride, step=stride).float32()
            shifts_y = jt.arange(0, grid_height * stride,
                                 step=stride).float32()
            shift_y, shift_x = jt.meshgrid(shifts_y, shifts_x)
            shift_x = shift_x.reshape(-1)
            shift_y = shift_y.reshape(-1)
            shifts = jt.stack((shift_x, shift_y, shift_x, shift_y), dim=1)

            anchors.append((shifts.reshape(-1, 1, 4) +
                            base_anchors.reshape(1, -1, 4)).reshape(-1, 4))

        return anchors
Example #27
0
    def get_pos_proposal_indexes(self, locations, box_regression,
                                 matched_idxes, targets):
        locations = jt.contrib.concat(locations, dim=0)
        pos_indexes_for_targets = []
        for im in range(len(targets)):
            pos_indexes_for_targets_per_im = locations.new_ones(
                len(targets[im])).long() * -1
            box_regression_im = [
                box_regression[l][im].detach().view(4, -1).transpose(0, 1) *
                self.fpn_strides[l] for l in range(len(box_regression))
            ]
            box_regression_im = jt.contrib.concat(box_regression_im, dim=0)
            for t_id in range(len(targets[im])):
                valid = matched_idxes[im] == t_id
                if valid.sum() == 0:
                    continue
                valid_location = locations[valid]
                valid_regression = box_regression_im[valid]
                detections = jt.stack([
                    valid_location[:, 0] - valid_regression[:, 0],
                    valid_location[:, 1] - valid_regression[:, 1],
                    valid_location[:, 0] + valid_regression[:, 2],
                    valid_location[:, 1] + valid_regression[:, 3],
                ],
                                      dim=1)
                detect_boxlist = BoxList(detections,
                                         targets[im].size,
                                         mode="xyxy")
                target_boxlist = BoxList(targets[im].bbox[t_id:t_id + 1],
                                         targets[im].size,
                                         mode="xyxy")
                match_quality_matrix = boxlist_iou(detect_boxlist,
                                                   target_boxlist)

                pos_labels_per_target = jt.zeros_like(valid)
                iou_in_target = match_quality_matrix[:, 0]
                pos_in_target = (iou_in_target == iou_in_target.max())
                pos_labels_per_target[valid] = pos_in_target
                pos_indexes_for_targets_per_im[
                    t_id] = pos_labels_per_target.nonzero()[0][0]

            pos_indexes_for_targets.append(pos_indexes_for_targets_per_im)

        return pos_indexes_for_targets
Example #28
0
def prepare_data(datum, allocation: list = None):
    with jt.no_grad():
        if allocation is None:
            allocation = []
            allocation.append(args.batch_size -
                              sum(allocation))  # The rest might need more/less

        images, (targets, masks, num_crowds) = datum

        cur_idx = 0
        for alloc in allocation:
            for _ in range(alloc):
                images[cur_idx] = gradinator(images[cur_idx])
                targets[cur_idx] = gradinator(targets[cur_idx])
                masks[cur_idx] = gradinator(masks[cur_idx])
                cur_idx += 1

        if cfg.preserve_aspect_ratio:
            # Choose a random size from the batch
            _, h, w = images[random.randint(0, len(images) - 1)].shape

            for idx, (image, target, mask, num_crowd) in enumerate(
                    zip(images, targets, masks, num_crowds)):
                images[idx], targets[idx], masks[idx], num_crowds[idx] \
                    = enforce_size(image, target, mask, num_crowd, w, h)

        cur_idx = 0
        split_images, split_targets, split_masks, split_numcrowds \
            = [[None for alloc in allocation] for _ in range(4)]

        for device_idx, alloc in enumerate(allocation):
            split_images[device_idx] = jt.stack(images[cur_idx:cur_idx +
                                                       alloc],
                                                dim=0)
            split_targets[device_idx] = targets[cur_idx:cur_idx + alloc]
            split_masks[device_idx] = masks[cur_idx:cur_idx + alloc]
            split_numcrowds[device_idx] = num_crowds[cur_idx:cur_idx + alloc]

            cur_idx += alloc

        return split_images[0], split_targets[0], split_masks[
            0], split_numcrowds[0]
Example #29
0
    def collate(data_list):
        r"""Collates a python list of data objects to the internal storage
        format of :class:`torch_geometric.data.InMemoryDataset`."""
        keys = data_list[0].keys
        data = data_list[0].__class__()

        for key in keys:
            data[key] = []
        slices = {key: [0] for key in keys}

        for item, key in product(data_list, keys):
            data[key].append(item[key])
            if isinstance(item[key], Var) and item[key].ndim > 0:
                cat_dim = item.__cat_dim__(key, item[key])
                cat_dim = 0 if cat_dim is None else cat_dim
                s = slices[key][-1] + item[key].size(cat_dim)
            else:
                s = slices[key][-1] + 1
            slices[key].append(s)

        if hasattr(data_list[0], '__num_nodes__'):
            data.__num_nodes__ = []
            for item in data_list:
                data.__num_nodes__.append(item.num_nodes)

        for key in keys:
            item = data_list[0][key]
            if isinstance(item, Var) and len(data_list) > 1:
                if item.ndim > 0:
                    cat_dim = data.__cat_dim__(key, item)
                    cat_dim = 0 if cat_dim is None else cat_dim
                    data[key] = jt.concat(data[key], dim=cat_dim)
                else:
                    data[key] = jt.stack(data[key])
            elif isinstance(item, Var):  # Don't duplicate attributes...
                data[key] = data[key][0]
            elif isinstance(item, int) or isinstance(item, float):
                data[key] = jt.array(data[key])

            slices[key] = jt.array(slices[key], dtype=Var.int32)

        return data, slices
Example #30
0
    def process_batch(self, detections, labels):
        """
        Return intersection-over-union (Jaccard index) of boxes.
        Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
        Arguments:
            detections (Array[N, 6]), x1, y1, x2, y2, conf, class
            labels (Array[M, 5]), class, x1, y1, x2, y2
        Returns:
            None, updates confusion matrix accordingly
        """
        detections = detections[detections[:, 4] > self.conf]
        gt_classes = labels[:, 0].int()
        detection_classes = detections[:, 5].int()
        iou = general.box_iou(labels[:, 1:], detections[:, :4])

        x = jt.where(iou > self.iou_thres)
        if x[0].shape[0]:
            matches = jt.contrib.concat(
                (jt.stack(x, 1), iou[x[0], x[1]][:, None]), 1).numpy()
            if x[0].shape[0] > 1:
                matches = matches[matches[:, 2].argsort()[::-1]]
                matches = matches[np.unique(matches[:, 1],
                                            return_index=True)[1]]
                matches = matches[matches[:, 2].argsort()[::-1]]
                matches = matches[np.unique(matches[:, 0],
                                            return_index=True)[1]]
        else:
            matches = np.zeros((0, 3))

        n = matches.shape[0] > 0
        m0, m1, _ = matches.transpose().astype(np.int16)
        for i, gc in enumerate(gt_classes):
            j = m0 == i
            if n and sum(j) == 1:
                self.matrix[gc, detection_classes[m1[j]]] += 1  # correct
            else:
                self.matrix[self.nc, gc] += 1  # background FP

        if n:
            for i, dc in enumerate(detection_classes):
                if not any(m1 == i):
                    self.matrix[dc, self.nc] += 1  # background FN