def sample_pdf(bins, weights, N_samples, det=False): # Get pdf weights = weights + 1e-5 # prevent nans pdf = weights / jt.sum(weights, -1, keepdims=True) cdf = jt.cumsum(pdf, -1) cdf = jt.concat([jt.zeros_like(cdf[..., :1]), cdf], -1) # (batch, len(bins)) # Take uniform samples if det: u = jt.linspace(0., 1., steps=N_samples) u = u.expand(list(cdf.shape[:-1]) + [N_samples]) else: u = jt.random(list(cdf.shape[:-1]) + [N_samples]) # Invert CDF inds = jt.searchsorted(cdf, u, right=True) below = jt.maximum(jt.zeros_like(inds - 1), inds - 1) above = jt.minimum((cdf.shape[-1] - 1) * jt.ones_like(inds), inds) inds_g = jt.stack([below, above], -1) # (batch, N_samples, 2) matched_shape = [inds_g.shape[0], inds_g.shape[1], cdf.shape[-1]] cdf_g = jt.gather(cdf.unsqueeze(1).expand(matched_shape), 2, inds_g) bins_g = jt.gather(bins.unsqueeze(1).expand(matched_shape), 2, inds_g) denom = (cdf_g[..., 1] - cdf_g[..., 0]) denom[denom < 1e-5] = 1.0 t = (u - cdf_g[..., 0]) / denom samples = bins_g[..., 0] + t * (bins_g[..., 1] - bins_g[..., 0]) return samples
def execute(self, inputs, targets, mask=None, act=False): losses = [] for id in range(len(inputs)): if mask is not None: input_flatten, target_flatten = self.flatten( inputs[id], targets[id], mask[id]) else: input_flatten, target_flatten = self.flatten( inputs[id], targets[id]) if act: MIN = 1e-9 input_flatten = jt.clamp(input_flatten, min_v=MIN, max_v=1 - MIN) input_flatten = jt.log(input_flatten) - jt.log(1 - input_flatten) losses.append(self.lovasz_hinge_flat(input_flatten, target_flatten)) losses = jt.stack(losses) if self.reduction == "mean": losses = losses.mean() elif self.reduction == "sum": losses = losses.sum() return losses
def make_grid(x, nrow=8, padding=2, normalize=False, range=None, scale_each=False, pad_value=0): assert isinstance(range, tuple) or range is None assert scale_each == False if isinstance(x, list): x = jt.stack(x) assert isinstance(x, jt.Var) if x.ndim < 4: return x if x.ndim == 4 and x.shape[0] <= 1: return x nrow = min(nrow, x.shape[0]) if normalize: if range is None: x = (x - x.min()) / (x.max() - x.min()) else: x = (x - range[0]) / (range[1] - range[0]) b, c, h, w = x.shape ncol = math.ceil(b / nrow) return x.reindex( [c, h * ncol + (ncol + 1) * padding, w * nrow + (nrow + 1) * padding], [ f"i1/{padding+h}*{nrow}+i2/{padding+w}", "i0", f"i1-i1/{padding+h}*{padding+h}-{padding}", f"i2-i2/{padding+w}*{padding+w}-{padding}" ], overflow_value=pad_value)
def collate_batch(batch): r"""Puts each data field into a tensor with outer dimension batch size""" real_size = len(batch) elem = batch[0] elem_type = type(elem) if isinstance(elem, jt.Var): temp_data = jt.stack([data for data in batch], 0) return temp_data if elem_type is np.ndarray: temp_data = np.stack([data for data in batch], 0) return temp_data elif np.issubdtype(elem_type, np.integer): return np.int32(batch) elif isinstance(elem, int): return np.int32(batch) elif isinstance(elem, float): return np.float32(batch) elif isinstance(elem, str): return batch elif isinstance(elem, Mapping): return {key: collate_batch([d[key] for d in batch]) for key in elem} elif isinstance(elem, tuple): transposed = zip(*batch) return tuple(collate_batch(samples) for samples in transposed) elif isinstance(elem, Sequence): transposed = zip(*batch) return [collate_batch(samples) for samples in transposed] elif isinstance(elem, Image.Image): temp_data = np.stack([np.array(data) for data in batch], 0) return temp_data else: raise TypeError(f"Not support type <{elem_type.__name__}>")
def project_masks_on_boxes(segmentation_masks, proposals, discretization_size): """ Given segmentation masks and the bounding boxes corresponding to the location of the masks in the image, this function crops and resizes the masks in the position defined by the boxes. This prepares the masks for them to be fed to the loss computation as the targets. Arguments: segmentation_masks: an instance of SegmentationMask proposals: an instance of BoxList """ masks = [] M = discretization_size device = proposals.bbox.device proposals = proposals.convert("xyxy") assert segmentation_masks.size == proposals.size, "{}, {}".format( segmentation_masks, proposals) # FIXME: CPU computation bottleneck, this should be parallelized proposals = proposals.bbox for segmentation_mask, proposal in zip(segmentation_masks, proposals): # crop the masks, resize them to the desired resolution and # then convert them to the tensor representation. cropped_mask = segmentation_mask.crop(proposal) scaled_mask = cropped_mask.resize((M, M)) mask = scaled_mask.get_mask_tensor() masks.append(mask) if len(masks) == 0: return jt.zeros(0).float32() return jt.stack(masks, dim=0).float32()
def test_lstm_cell(self): np_h0 = torch.randn(3, 20).numpy() np_c0 = torch.randn(3, 20).numpy() t_rnn = tnn.LSTMCell(10, 20) input = torch.randn(2, 3, 10) h0 = torch.from_numpy(np_h0) c0 = torch.from_numpy(np_c0) t_output = [] for i in range(input.size()[0]): h0, c0 = t_rnn(input[i], (h0, c0)) t_output.append(h0) t_output = torch.stack(t_output, dim=0) j_rnn = nn.LSTMCell(10, 20) j_rnn.load_state_dict(t_rnn.state_dict()) input = jt.float32(input.numpy()) h0 = jt.float32(np_h0) c0 = jt.float32(np_c0) j_output = [] for i in range(input.size()[0]): h0, c0 = j_rnn(input[i], (h0, c0)) j_output.append(h0) j_output = jt.stack(j_output, dim=0) t_output = t_output.detach().numpy() j_output = j_output.data assert np.allclose(t_output, j_output, rtol=1e-03, atol=1e-06)
def test_stack(self): arr1 = np.random.randn(16, 3, 224, 224) arr2 = np.random.randn(16, 3, 224, 224) check_equal(torch.stack( [torch.Tensor(arr1), torch.Tensor(arr2)], 0), jt.stack([jt.array(arr1), jt.array(arr2)], 0)) print('pass stack test ...')
def knn_indices_func_gpu( rep_pts, # (N, pts, dim) pts, # (N, x, dim) k: int, d: int): # (N, pts, K) """ GPU-based Indexing function based on K-Nearest Neighbors search. Very memory intensive, and thus unoptimal for large numbers of points. :param rep_pts: Representative points. :param pts: Point cloud to get indices from. :param K: Number of nearest neighbors to collect. :param D: "Spread" of neighboring points. :return: Array of indices, P_idx, into pts such that pts[n][P_idx[n],:] is the set k-nearest neighbors for the representative points in pts[n]. """ region_idx = [] batch_size = rep_pts.shape[0] for idx in range(batch_size): qry = rep_pts[idx] ref = pts[idx] n, d = ref.shape m, d = qry.shape mref = ref.view(1, n, d).repeat(m, 1, 1) mqry = qry.view(m, 1, d).repeat(1, n, 1) dist2 = jt.sum((mqry - mref)**2, 2) # pytorch has squeeze _, inds = topk(dist2, k * d + 1, dim=1, largest=False) region_idx.append(inds[:, 1::d]) region_idx = jt.stack(region_idx, dim=0) return region_idx
def prepare_data(datum): with jt.no_grad(): images, (targets, masks, num_crowds) = datum if not isinstance(images[0], jt.Var): images = [jt.array(image, dtype='float32') for image in images] if not isinstance(targets[0], jt.Var): targets = [jt.array(t, dtype='float32') for t in targets] if not isinstance(masks[0], jt.Var): masks = [jt.array(m, dtype='float32') for m in masks] for cur_idx in range(args.batch_size): images[cur_idx] = gradinator(images[cur_idx]) targets[cur_idx] = gradinator(targets[cur_idx]) masks[cur_idx] = gradinator(masks[cur_idx]) if cfg.preserve_aspect_ratio: # Choose a random size from the batch _, h, w = images[random.randint(0, len(images) - 1)].shape for idx, (image, target, mask, num_crowd) in enumerate( zip(images, targets, masks, num_crowds)): images[idx], targets[idx], masks[idx], num_crowds[idx] \ = enforce_size(image, target, mask, num_crowd, w, h) return jt.stack(images, dim=0), targets, masks, num_crowds
def quat_conjugate(quat): q0 = quat[:, :, 0] q1 = ((- 1) * quat[:, :, 1]) q2 = ((- 1) * quat[:, :, 2]) q3 = ((- 1) * quat[:, :, 3]) q_conj = jt.stack([q0, q1, q2, q3], dim=2) return q_conj
def get_sample_region(self, gt, strides, num_points_per, gt_xs, gt_ys, radius=1): num_gts = gt.shape[0] K = len(gt_xs) gt = gt[None].expand(K, num_gts, 4) center_x = (gt[..., 0] + gt[..., 2]) / 2 center_y = (gt[..., 1] + gt[..., 3]) / 2 center_gt = gt.new_zeros(gt.shape) # no gt if center_x[..., 0].sum() == 0: return gt_xs.new_zeros(gt_xs.shape, dtype='uint8') beg = 0 for level, n_p in enumerate(num_points_per): end = beg + n_p stride = strides[level] * radius xmin = center_x[beg:end] - stride ymin = center_y[beg:end] - stride xmax = center_x[beg:end] + stride ymax = center_y[beg:end] + stride # limit sample region in gt center_gt[beg:end, :, 0] = jt.ternary(xmin > gt[beg:end, :, 0], xmin, gt[beg:end, :, 0]) center_gt[beg:end, :, 1] = jt.ternary(ymin > gt[beg:end, :, 1], ymin, gt[beg:end, :, 1]) center_gt[beg:end, :, 2] = jt.ternary(xmax > gt[beg:end, :, 2], gt[beg:end, :, 2], xmax) center_gt[beg:end, :, 3] = jt.ternary(ymax > gt[beg:end, :, 3], gt[beg:end, :, 3], ymax) beg = end left = gt_xs[:, None] - center_gt[..., 0] right = center_gt[..., 2] - gt_xs[:, None] top = gt_ys[:, None] - center_gt[..., 1] bottom = center_gt[..., 3] - gt_ys[:, None] center_bbox = jt.stack((left, top, right, bottom), -1) inside_gt_bbox_mask = center_bbox.min(-1)[0] > 0 return inside_gt_bbox_mask
def encode(self, reference_boxes, proposals): """ Encode a set of proposals with respect to some reference boxes Arguments: reference_boxes (Tensor): reference boxes proposals (Tensor): boxes to be encoded """ TO_REMOVE = 1 # TODO remove ex_widths = proposals[:, 2] - proposals[:, 0] + TO_REMOVE ex_heights = proposals[:, 3] - proposals[:, 1] + TO_REMOVE ex_ctr_x = proposals[:, 0] + 0.5 * ex_widths ex_ctr_y = proposals[:, 1] + 0.5 * ex_heights gt_widths = reference_boxes[:, 2] - reference_boxes[:, 0] + TO_REMOVE gt_heights = reference_boxes[:, 3] - reference_boxes[:, 1] + TO_REMOVE gt_ctr_x = reference_boxes[:, 0] + 0.5 * gt_widths gt_ctr_y = reference_boxes[:, 1] + 0.5 * gt_heights wx, wy, ww, wh = self.weights targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights targets_dw = ww * jt.log(gt_widths / ex_widths) targets_dh = wh * jt.log(gt_heights / ex_heights) targets = jt.stack((targets_dx, targets_dy, targets_dw, targets_dh), dim=1) return targets
def compute_locations_per_level(self, h, w, stride): shifts_x = jt.arange(0, w * stride, step=stride, dtype='float32') shifts_y = jt.arange(0, h * stride, step=stride, dtype='float32') shift_y, shift_x = jt.meshgrid(shifts_y, shifts_x) shift_x = shift_x.reshape(-1) shift_y = shift_y.reshape(-1) locations = jt.stack((shift_x, shift_y), dim=1) + stride // 2 return locations
def projection(vertices, K, R, t, dist_coeffs, orig_size, eps=1e-9): ''' Calculate projective transformation of vertices given a projection matrix Input parameters: K: batch_size * 3 * 3 intrinsic camera matrix R, t: batch_size * 3 * 3, batch_size * 1 * 3 extrinsic calibration parameters dist_coeffs: vector of distortion coefficients orig_size: original size of image captured by the camera Returns: For each point [X,Y,Z] in world coordinates [u,v,z] where u,v are the coordinates of the projection in pixels and z is the depth ''' # instead of P*x we compute x'*P' vertices = jt.matmul(vertices, R.transpose((0, 2, 1))[0]) + t x, y, z = vertices[:, :, 0], vertices[:, :, 1], vertices[:, :, 2] x_ = x / (z + eps) y_ = y / (z + eps) # Get distortion coefficients from vector k1 = dist_coeffs[:, 0].unsqueeze(1) k2 = dist_coeffs[:, 1].unsqueeze(1) p1 = dist_coeffs[:, 2].unsqueeze(1) p2 = dist_coeffs[:, 3].unsqueeze(1) k3 = dist_coeffs[:, 4].unsqueeze(1) # we use x_ for x' and x__ for x'' etc. x_2 = x_.sqr() y_2 = y_.sqr() r = jt.sqrt(x_2 + y_2) r2 = r.sqr() r4 = r2.sqr() r6 = r4 * r2 tmp = k1 * (r2) + k2 * (r4) + k3 * (r6) + 1 x__ = x_ * tmp + 2 * p1 * x_ * y_ + p2 * (r2 + 2 * x_2) y__ = y_ * tmp + p1 * (r2 + 2 * y_2) + 2 * p2 * x_ * y_ vertices = jt.stack([x__, y__, jt.ones(z.shape)], dim=-1) vertices = jt.matmul(vertices, K.transpose((0, 2, 1))[0]) u, v = vertices[:, :, 0], vertices[:, :, 1] v = orig_size - v # map u,v from [0, img_size] to [-1, 1] to use by the renderer u = 2 * (u - orig_size / 2.) / orig_size v = 2 * (v - orig_size / 2.) / orig_size vertices = jt.stack([u, v, z], dim=-1) return vertices
def select_region(self, pts, # (N, x, dims) pts_idx): # (P, K, dims) regions = jt.stack([ pts[n][idx,:] for n, idx in enumerate(jt.misc.unbind(pts_idx, dim = 0)) ], dim = 0) return regions
def convert_to_binarymask(self): if len(self) > 0: masks = jt.stack( [p.convert_to_binarymask() for p in self.polygons]) else: size = self.size masks = jt.empty([0, size[1], size[0]]).bool() return BinaryMaskList(masks, size=self.size)
def get_rays(H, W, focal, c2w, intrinsic=None): i, j = jt.meshgrid(jt.linspace(0, W - 1, W), jt.linspace(0, H - 1, H)) i = i.t() j = j.t() if intrinsic is None: dirs = jt.stack([(i - W * .5) / focal, (j - H * .5) / focal, jt.ones_like(i)], -1).unsqueeze(-2) else: i += 0.5 j += 0.5 dirs = jt.stack([i, j, jt.ones_like(i)], -1).unsqueeze(-2) dirs = jt.sum(dirs * intrinsic[:3, :3], -1).unsqueeze(-2) # Rotate ray directions from camera frame to the world frame rays_d = jt.sum( dirs * c2w[:3, :3], -1) # dot product, equals to: [c2w.dot(dir) for dir in dirs] # Translate camera frame's origin to the world frame. It is the origin of all rays. rays_o = c2w[:3, -1].expand(rays_d.shape) return rays_o, rays_d
def make_grid(x, nrow=8, padding=2, normalize=False, range=None, scale_each=False, pad_value=0): assert range == None assert scale_each == False if isinstance(x, list): x = jt.stack(x) if normalize: x = (x - x.min()) / (x.max() - x.min()) b,c,h,w = x.shape ncol = math.ceil(b / nrow) return x.reindex([c, h*ncol+(ncol+1)*padding, w*nrow+(nrow+1)*padding], [f"i1/{padding+h}*{nrow}+i2/{padding+w}", "i0", f"i1-i1/{padding+h}*{padding+h}-{padding}", f"i2-i2/{padding+w}*{padding+w}-{padding}"], overflow_value=pad_value)
def ndc_rays(H, W, focal, near, rays_o, rays_d): # Shift ray origins to near plane t = -(near + rays_o[..., 2]) / rays_d[..., 2] rays_o = rays_o + t.unsqueeze(-1) * rays_d # Projection o0 = -1. / (W / (2. * focal)) * rays_o[..., 0] / rays_o[..., 2] o1 = -1. / (H / (2. * focal)) * rays_o[..., 1] / rays_o[..., 2] o2 = 1. + 2. * near / rays_o[..., 2] d0 = -1. / (W / (2. * focal)) * (rays_d[..., 0] / rays_d[..., 2] - rays_o[..., 0] / rays_o[..., 2]) d1 = -1. / (H / (2. * focal)) * (rays_d[..., 1] / rays_d[..., 2] - rays_o[..., 1] / rays_o[..., 2]) d2 = -2. * near / rays_o[..., 2] rays_o = jt.stack([o0, o1, o2], -1) rays_d = jt.stack([d0, d1, d2], -1) return rays_o, rays_d
def edge_index_from_dict(graph_dict, num_nodes=None, coalesce=False): row, col = [], [] for key, value in graph_dict.items(): row += repeat(key, len(value)) col += value edge_index = jt.stack([jt.array(row), jt.array(col)], dim=0) if coalesce: # NOTE: There are some duplicated edges and self loops in the datasets. # Other implementations do not remove them! edge_index, _ = remove_self_loops(edge_index) edge_index, _ = coalesce_fn(edge_index, None, num_nodes, num_nodes) return edge_index
def get_textures_from_im(im, tx_size=1): b, c, h, w = im.shape if tx_size == 1: textures = jt.contrib.concat([ im[:, :, :h - 1, :w - 1].reshape(b, c, -1), im[:, :, 1:, 1:].reshape(b, c, -1) ], 2) textures = textures.transpose(2, 1).reshape(b, -1, 1, 1, 1, c) elif tx_size == 2: textures1 = jt.stack([ im[:, :, :h - 1, :w - 1], im[:, :, :h - 1, 1:], im[:, :, 1:, :w - 1] ], -1).reshape(b, c, -1, 3) textures2 = jt.stack( [im[:, :, 1:, :w - 1], im[:, :, :h - 1, 1:], im[:, :, 1:, 1:]], -1).reshape(b, c, -1, 3) textures = vcolor_to_texture_cube( jt.contrib.concat([textures1, textures2], 2)) # bxnx2x2x2xc else: raise NotImplementedError( "Currently support texture size of 1 or 2 only.") return textures
def test(): model.eval() logits, accs = model(), [] for _, mask in data('train_mask', 'val_mask', 'test_mask'): y_ = data.y[mask] tmp = [] for i in range(mask.shape[0]): if mask[i] == True: tmp.append(logits[i]) logits_ = jt.stack(tmp) pred, _ = jt.argmax(logits_, dim=1) acc = pred.equal(y_).sum().item() / mask.sum().item() accs.append(acc) return accs
def compute_targets_for_locations(self, locations, targets, object_sizes_of_interest): labels = [] reg_targets = [] xs, ys = locations[:, 0], locations[:, 1] for im_i in range(len(targets)): targets_per_im = targets[im_i] assert targets_per_im.mode == "xyxy" bboxes = targets_per_im.bbox labels_per_im = targets_per_im.get_field("labels") area = targets_per_im.area() l = xs[:, None] - bboxes[:, 0][None] t = ys[:, None] - bboxes[:, 1][None] r = bboxes[:, 2][None] - xs[:, None] b = bboxes[:, 3][None] - ys[:, None] reg_targets_per_im = jt.stack([l, t, r, b], dim=2) if self.center_sample: is_in_boxes = self.get_sample_region( bboxes, self.strides, self.num_points_per_level, xs, ys, radius=self.radius) else: is_in_boxes = reg_targets_per_im.min(dim=2)[0] > 0 max_reg_targets_per_im = reg_targets_per_im.max(dim=2)[0] # limit the regression range for each location is_cared_in_the_level = \ (max_reg_targets_per_im >= object_sizes_of_interest[:, [0]]) & \ (max_reg_targets_per_im <= object_sizes_of_interest[:, [1]]) locations_to_gt_area = area[None].repeat(len(locations), 1) locations_to_gt_area[is_in_boxes == 0] = INF locations_to_gt_area[is_cared_in_the_level == 0] = INF # if there are still more than one objects for a location, # we choose the one with minimal area locations_to_min_area, locations_to_gt_inds = locations_to_gt_area.min(dim=1) reg_targets_per_im = reg_targets_per_im[range(len(locations)), locations_to_gt_inds] labels_per_im = labels_per_im[locations_to_gt_inds] labels_per_im[locations_to_min_area == INF] = 0 labels.append(labels_per_im) reg_targets.append(reg_targets_per_im) return labels, reg_targets
def prepare_targets(self, points, targets, im_w, im_h): object_sizes_of_interest = self.object_sizes_of_interest expanded_object_sizes_of_interest = [] for l, points_per_level in enumerate(points): object_sizes_of_interest_per_level = \ points_per_level.new_tensor(object_sizes_of_interest[l]) expanded_object_sizes_of_interest.append( object_sizes_of_interest_per_level[None].expand( len(points_per_level), -1)) expanded_object_sizes_of_interest = jt.contrib.concat( expanded_object_sizes_of_interest, dim=0) num_points_per_level = [ len(points_per_level) for points_per_level in points ] self.num_points_per_level = num_points_per_level points_all_level = jt.contrib.concat(points, dim=0) labels, reg_targets, matched_idxes = self.compute_targets_for_locations( points_all_level, targets, expanded_object_sizes_of_interest, im_w, im_h) labels_split = [] reg_targets_split = [] for i in range(len(labels)): labels_split.append( jt.split(labels[i], num_points_per_level, dim=0)) reg_targets_split.append( jt.split(reg_targets[i], num_points_per_level, dim=0)) labels_level_first = [] reg_targets_level_first = [] for level in range(len(points)): labels_level_first.append( jt.contrib.concat( [labels_per_im[level] for labels_per_im in labels_split], dim=0)) reg_targets_per_level = \ jt.contrib.concat([reg_targets_per_im[level] for reg_targets_per_im in reg_targets_split], dim=0) if self.norm_reg_targets: reg_targets_per_level = reg_targets_per_level / self.fpn_strides[ level] reg_targets_level_first.append(reg_targets_per_level) matched_idxes = jt.stack(matched_idxes) return labels_level_first, reg_targets_level_first, labels, reg_targets, matched_idxes
def forward_single_image(self, masks, boxes): boxes = boxes.convert("xyxy") im_w, im_h = boxes.size res = [] for i in range(boxes.bbox.shape[0]): mask = masks[i] if mask.ndim == 3: mask = mask[0] res.append( paste_mask_in_image(mask, boxes.bbox[i], im_h, im_w, self.threshold, self.padding)) if len(res) > 0: res = jt.stack(res, dim=0)[:].unsqueeze(1) else: res = masks.new_empty((0, 1, masks.shape[-2], masks.shape[-1])) return res
def grid_anchors(self, grid_sizes): anchors = [] for size, stride, base_anchors in zip(grid_sizes, self.strides, self.cell_anchors): grid_height, grid_width = size shifts_x = jt.arange(0, grid_width * stride, step=stride).float32() shifts_y = jt.arange(0, grid_height * stride, step=stride).float32() shift_y, shift_x = jt.meshgrid(shifts_y, shifts_x) shift_x = shift_x.reshape(-1) shift_y = shift_y.reshape(-1) shifts = jt.stack((shift_x, shift_y, shift_x, shift_y), dim=1) anchors.append((shifts.reshape(-1, 1, 4) + base_anchors.reshape(1, -1, 4)).reshape(-1, 4)) return anchors
def get_pos_proposal_indexes(self, locations, box_regression, matched_idxes, targets): locations = jt.contrib.concat(locations, dim=0) pos_indexes_for_targets = [] for im in range(len(targets)): pos_indexes_for_targets_per_im = locations.new_ones( len(targets[im])).long() * -1 box_regression_im = [ box_regression[l][im].detach().view(4, -1).transpose(0, 1) * self.fpn_strides[l] for l in range(len(box_regression)) ] box_regression_im = jt.contrib.concat(box_regression_im, dim=0) for t_id in range(len(targets[im])): valid = matched_idxes[im] == t_id if valid.sum() == 0: continue valid_location = locations[valid] valid_regression = box_regression_im[valid] detections = jt.stack([ valid_location[:, 0] - valid_regression[:, 0], valid_location[:, 1] - valid_regression[:, 1], valid_location[:, 0] + valid_regression[:, 2], valid_location[:, 1] + valid_regression[:, 3], ], dim=1) detect_boxlist = BoxList(detections, targets[im].size, mode="xyxy") target_boxlist = BoxList(targets[im].bbox[t_id:t_id + 1], targets[im].size, mode="xyxy") match_quality_matrix = boxlist_iou(detect_boxlist, target_boxlist) pos_labels_per_target = jt.zeros_like(valid) iou_in_target = match_quality_matrix[:, 0] pos_in_target = (iou_in_target == iou_in_target.max()) pos_labels_per_target[valid] = pos_in_target pos_indexes_for_targets_per_im[ t_id] = pos_labels_per_target.nonzero()[0][0] pos_indexes_for_targets.append(pos_indexes_for_targets_per_im) return pos_indexes_for_targets
def prepare_data(datum, allocation: list = None): with jt.no_grad(): if allocation is None: allocation = [] allocation.append(args.batch_size - sum(allocation)) # The rest might need more/less images, (targets, masks, num_crowds) = datum cur_idx = 0 for alloc in allocation: for _ in range(alloc): images[cur_idx] = gradinator(images[cur_idx]) targets[cur_idx] = gradinator(targets[cur_idx]) masks[cur_idx] = gradinator(masks[cur_idx]) cur_idx += 1 if cfg.preserve_aspect_ratio: # Choose a random size from the batch _, h, w = images[random.randint(0, len(images) - 1)].shape for idx, (image, target, mask, num_crowd) in enumerate( zip(images, targets, masks, num_crowds)): images[idx], targets[idx], masks[idx], num_crowds[idx] \ = enforce_size(image, target, mask, num_crowd, w, h) cur_idx = 0 split_images, split_targets, split_masks, split_numcrowds \ = [[None for alloc in allocation] for _ in range(4)] for device_idx, alloc in enumerate(allocation): split_images[device_idx] = jt.stack(images[cur_idx:cur_idx + alloc], dim=0) split_targets[device_idx] = targets[cur_idx:cur_idx + alloc] split_masks[device_idx] = masks[cur_idx:cur_idx + alloc] split_numcrowds[device_idx] = num_crowds[cur_idx:cur_idx + alloc] cur_idx += alloc return split_images[0], split_targets[0], split_masks[ 0], split_numcrowds[0]
def collate(data_list): r"""Collates a python list of data objects to the internal storage format of :class:`torch_geometric.data.InMemoryDataset`.""" keys = data_list[0].keys data = data_list[0].__class__() for key in keys: data[key] = [] slices = {key: [0] for key in keys} for item, key in product(data_list, keys): data[key].append(item[key]) if isinstance(item[key], Var) and item[key].ndim > 0: cat_dim = item.__cat_dim__(key, item[key]) cat_dim = 0 if cat_dim is None else cat_dim s = slices[key][-1] + item[key].size(cat_dim) else: s = slices[key][-1] + 1 slices[key].append(s) if hasattr(data_list[0], '__num_nodes__'): data.__num_nodes__ = [] for item in data_list: data.__num_nodes__.append(item.num_nodes) for key in keys: item = data_list[0][key] if isinstance(item, Var) and len(data_list) > 1: if item.ndim > 0: cat_dim = data.__cat_dim__(key, item) cat_dim = 0 if cat_dim is None else cat_dim data[key] = jt.concat(data[key], dim=cat_dim) else: data[key] = jt.stack(data[key]) elif isinstance(item, Var): # Don't duplicate attributes... data[key] = data[key][0] elif isinstance(item, int) or isinstance(item, float): data[key] = jt.array(data[key]) slices[key] = jt.array(slices[key], dtype=Var.int32) return data, slices
def process_batch(self, detections, labels): """ Return intersection-over-union (Jaccard index) of boxes. Both sets of boxes are expected to be in (x1, y1, x2, y2) format. Arguments: detections (Array[N, 6]), x1, y1, x2, y2, conf, class labels (Array[M, 5]), class, x1, y1, x2, y2 Returns: None, updates confusion matrix accordingly """ detections = detections[detections[:, 4] > self.conf] gt_classes = labels[:, 0].int() detection_classes = detections[:, 5].int() iou = general.box_iou(labels[:, 1:], detections[:, :4]) x = jt.where(iou > self.iou_thres) if x[0].shape[0]: matches = jt.contrib.concat( (jt.stack(x, 1), iou[x[0], x[1]][:, None]), 1).numpy() if x[0].shape[0] > 1: matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 1], return_index=True)[1]] matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 0], return_index=True)[1]] else: matches = np.zeros((0, 3)) n = matches.shape[0] > 0 m0, m1, _ = matches.transpose().astype(np.int16) for i, gc in enumerate(gt_classes): j = m0 == i if n and sum(j) == 1: self.matrix[gc, detection_classes[m1[j]]] += 1 # correct else: self.matrix[self.nc, gc] += 1 # background FP if n: for i, dc in enumerate(detection_classes): if not any(m1 == i): self.matrix[dc, self.nc] += 1 # background FN