def mask_to_tight_box(mask): a = mask.nonzero() bbox = [ jt.min(a[:, 1]), jt.min(a[:, 0]), jt.max(a[:, 1]), jt.max(a[:, 0]), ] bbox = list(map(int, bbox)) return bbox # xmin, ymin, xmax, ymax
def execute(self, x): avg_out = jt.mean(x, dim=1, keepdims=1) # 压缩通道 max_out = jt.max(x, dim=1, keepdims=1) # 压缩通道 x = jt.contrib.concat([avg_out, max_out], dim=1) # [b, 1, h, w] x = self.conv1(x) y = self.sigmoid(x) return y
def execute(self, x): xyz = x.permute(0, 2, 1) batch_size, _, _ = x.size() x = self.relu(self.bn1(self.conv1(x))) # B, D, N x = self.relu(self.bn2(self.conv2(x))) # B, D, N x = x.permute(0, 2, 1) new_xyz, new_feature = sample_and_group(npoint=512, nsample=32, xyz=xyz, points=x) feature_0 = self.gather_local_0(new_feature) feature = feature_0.permute(0, 2, 1) new_xyz, new_feature = sample_and_group(npoint=256, nsample=32, xyz=new_xyz, points=feature) feature_1 = self.gather_local_1(new_feature) x = self.pt_last(feature_1) x = concat([x, feature_1], dim=1) x = self.conv_fuse(x) x = jt.max(x, 2) x = x.view(batch_size, -1) x = self.relu(self.bn6(self.linear1(x))) x = self.dp1(x) x = self.relu(self.bn7(self.linear2(x))) x = self.dp2(x) x = self.linear3(x) return x
def cc_fast_nms(self, boxes, masks, scores, iou_threshold: float = 0.5, top_k: int = 200): # Collapse all the classes into 1 classes, scores = scores.argmax(dim=0) idx, _ = scores.argsort(0, descending=True) idx = idx[:top_k] boxes_idx = boxes[idx] # Compute the pairwise IoU between the boxes iou = jaccard(boxes_idx, boxes_idx) # Zero out the lower triangle of the cosine similarity matrix and diagonal iou = iou.triu_(diagonal=1) # Now that everything in the diagonal and below is zeroed out, if we take the max # of the IoU matrix along the columns, each column will represent the maximum IoU # between this element and every element with a higher score than this element. iou_max = jt.max(iou, dim=0) # Now just filter out the ones greater than the threshold, i.e., only keep boxes that # don't have a higher scoring box that would supress it in normal NMS. idx_out = idx[iou_max <= iou_threshold] return boxes[idx_out], masks[idx_out], classes[idx_out], scores[ idx_out]
def detect(self, batch_idx, conf_preds, decoded_boxes, mask_data, inst_data): """ Perform nms for only the max scoring class that isn't background (class 0) """ with timer.env('Slices'): cur_scores = conf_preds[batch_idx, 1:] conf_scores = jt.max(cur_scores, dim=0) keep = (conf_scores > self.conf_thresh) keep = jt.where(keep)[0] scores = cur_scores[:, keep] boxes = decoded_boxes[keep] masks = mask_data[batch_idx, keep] if inst_data is not None: inst = inst_data[batch_idx, keep] if scores.shape[1] == 0: return None if self.use_fast_nms: if self.use_cross_class_nms: boxes, masks, classes, scores = self.cc_fast_nms( boxes, masks, scores, self.nms_thresh, self.top_k) else: boxes, masks, classes, scores = self.fast_nms( boxes, masks, scores, self.nms_thresh, self.top_k) else: boxes, masks, classes, scores = self.traditional_nms( boxes, masks, scores, self.nms_thresh, self.conf_thresh) if self.use_cross_class_nms: print( 'Warning: Cross Class Traditional NMS is not implemented.') return {'box': boxes, 'mask': masks, 'class': classes, 'score': scores}
def execute(self, x): batchsize = x.shape[0] # print ('x shape =', x.shape) x = self.conv1(x) # print ('x shape =', x.shape) x = self.bn1(x) x = self.relu(x) x = self.relu(self.bn2(self.conv2(x))) x = self.relu(self.bn3(self.conv3(x))) # print ('before max shape is', x.shape) x = jt.max(x, 2) # print ('after max shape is', x.shape) x = x.reshape(-1, 1024) #x = self.relu(self.bn4(self.fc1(x))) x = self.fc1(x) x = self.bn4(x) x = self.relu(x) x = self.relu(self.bn5(self.fc2(x))) x = self.fc3(x) iden = ((jt.array(np.array([1,0,0,0,1,0,0,0,1]).astype(np.float32))).reshape(1,9)).repeat(batchsize, 1) # print (iden.shape) x = x + iden # print (x.shape) x = x.reshape(-1, 3, 3) return x
def execute(self, x, cls_label): batch_size, _, N = x.size() x = self.relu(self.bn1(self.conv1(x))) # B, D, N x = self.relu(self.bn2(self.conv2(x))) x1 = self.sa1(x) x2 = self.sa2(x1) x3 = self.sa3(x2) x4 = self.sa4(x3) x = concat((x1, x2, x3, x4), dim=1) x = self.conv_fuse(x) x_max = jt.max(x, 2) x_avg = jt.mean(x, 2) x_max_feature = x_max.view(batch_size, -1).unsqueeze(-1).repeat(1, 1, N) x_avg_feature = x_avg.view(batch_size, -1).unsqueeze(-1).repeat(1, 1, N) cls_label_one_hot = cls_label.view(batch_size, 16, 1) cls_label_feature = self.label_conv(cls_label_one_hot).repeat(1, 1, N) x_global_feature = concat( (x_max_feature, x_avg_feature, cls_label_feature), 1) # 1024 + 64 x = concat((x, x_global_feature), 1) # 1024 * 3 + 64 x = self.relu(self.bns1(self.convs1(x))) x = self.dp1(x) x = self.relu(self.bns2(self.convs2(x))) x = self.convs3(x) return x
def execute(self, points): # points: [batch-size, num-points, num-dims] batch_size = points.size(0) features = self.input_embeds(points) features, points = self.knn_embeds((features, points)) # [batch-size, num-points, num-dims] -> [batch-size, num-dims, num-points] x = self.transformer(features, points) x = jt.max(x, 2).view(batch_size, -1) return self.classifier(x)
def execute(self, x): b, n, s, d = x.size() # torch.Size([32, 512, 32, 6]) x = x.permute(0, 1, 3, 2) x = x.reshape(-1, d, s) batch_size, _, N = x.size() x = self.relu(self.bn1(self.conv1(x))) # B, D, N x = self.relu(self.bn2(self.conv2(x))) # B, D, N x = jt.max(x, 2) x = x.view(batch_size, -1) x = x.reshape(b, n, -1).permute(0, 2, 1) return x
def execute(self, x): try: avg_out = jt.mean(x, dim=1, keepdims=True) max_out = jt.max(x, dim=1, keepdims=True) scale = jt.contrib.concat([avg_out, max_out], dim=1) scale = self.conv(scale) out = x * self.sigmoid(scale) except Exception as e: print(e) out = x return out
def execute(self, x): b, c, n = x.size() x = self.relu(self.bn1(self.conv1(x))) x = self.relu(self.bn2(self.conv2(x))) x = self.relu(self.bn3(self.conv3(x))) x = self.relu(self.bn4(self.conv4(x))) x = self.relu(self.bn5(self.conv5(x))) x = jt.max(x, 2) x = x.reshape(b, -1) x = self.relu(self.bn6(self.linear1(x))) x = self.dp1(x) x = self.linear2(x) return x
def display_lincomb(proto_data, masks): out_masks = jt.matmul(proto_data, masks.t()) # out_masks = cfg.mask_proto_mask_activation(out_masks) for kdx in range(1): jdx = kdx + 0 import matplotlib.pyplot as plt coeffs = masks[jdx].numpy() idx = np.argsort(-np.abs(coeffs)) # plt.bar(list(range(idx.shape[0])), coeffs[idx]) # plt.show() coeffs_sort = coeffs[idx] arr_h, arr_w = (4, 8) proto_h, proto_w, _ = proto_data.shape arr_img = np.zeros([proto_h * arr_h, proto_w * arr_w]) arr_run = np.zeros([proto_h * arr_h, proto_w * arr_w]) test = jt.sum(proto_data, -1).numpy() for y in range(arr_h): for x in range(arr_w): i = arr_w * y + x if i == 0: running_total = proto_data[:, :, idx[i]].numpy() * coeffs_sort[i] else: running_total += proto_data[:, :, idx[i]].numpy( ) * coeffs_sort[i] running_total_nonlin = running_total if cfg.mask_proto_mask_activation == activation_func.sigmoid: running_total_nonlin = ( 1 / (1 + np.exp(-running_total_nonlin))) arr_img[y * proto_h:(y + 1) * proto_h, x * proto_w:(x + 1) * proto_w] = (proto_data[:, :, idx[i]] / jt.max( proto_data[:, :, idx[i]])).numpy() * coeffs_sort[i] arr_run[y * proto_h:(y + 1) * proto_h, x * proto_w:(x + 1) * proto_w] = (running_total_nonlin > 0.5).astype( np.float) plt.imshow(arr_img) plt.show() # plt.imshow(arr_run) # plt.show() # plt.imshow(test) # plt.show() plt.imshow(out_masks[:, :, jdx].numpy()) plt.show()
def execute(self, x): # points: [batch-size, num-points, num-dims] # features: [batch-size, num-points, num-dims] features, points = x # features: [batch-size, num-points, num-neighbors, num-dims] points, features = fops.sample_and_group(self.num_points, self.num_neighbors, points, features) batch_size, num_points, num_neighbors, num_dims = features.size() # features: [batch-size*num_points, num-dims, num_neighbors] features = features.permute(0, 1, 3, 2).reshape(-1, num_dims, num_neighbors) features = jt.max(self.embeds(features), -1).view(batch_size, num_points, num_dims) return features, points
def execute(self, x): batchsize = x.size()[0] x = self.relu(self.bn1(self.conv1(x))) x = self.relu(self.bn2(self.conv2(x))) x = self.relu(self.bn3(self.conv3(x))) x = jt.max(x, 2) x = x.reshape(-1, 1024) x = self.relu(self.bn4(self.fc1(x))) x = self.relu(self.bn5(self.fc2(x))) x = self.fc3(x) iden = jt.array(np.eye(self.k).flatten().astype(np.float32)).reshape(1,self.k*self.k) x = x + iden x = x.reshape(-1, self.k, self.k) return x
def execute(self, x): batch_size, _, N = x.size() # print (x.size()) x = self.relu(self.bn1(self.conv1(x))) # B, D, N x = self.relu(self.bn2(self.conv2(x))) x1 = self.sa1(x) x2 = self.sa2(x1) x3 = self.sa3(x2) x4 = self.sa4(x3) x = concat((x1, x2, x3, x4), dim=1) x = self.conv_fuse(x) # x = F.adaptive_max_pool1d(x, 1).view(batch_size, -1) x = jt.max(x, 2) x = x.view(batch_size, -1) x = self.relu(self.bn6(self.linear1(x))) x = self.dp1(x) x = self.relu(self.bn7(self.linear2(x))) x = self.dp2(x) x = self.linear3(x) return x
def directional_lighting(diffuseLight, specularLight, normals, light_intensity=0.5, light_color=(1, 1, 1), light_direction=(0, 1, 0), positions=None, eye=None, with_specular=False, metallic_textures=None, roughness_textures=None, Gbuffer="None", transform=None): eye = jt.array(eye, "float32") light_color = jt.array(light_color, "float32") light_direction = jt.normalize(jt.array(light_direction, "float32"), dim=0) if len(light_color.shape) == 1: light_color = light_color.unsqueeze(0) if len(light_direction.shape) == 1: light_direction = light_direction.unsqueeze(0) cosine = nn.relu(jt.sum(normals * light_direction, dim=2)) if with_specular: if len(metallic_textures.shape) == 4: total = metallic_textures.shape[2] * 1.0 metallic_textures = jt.sum(metallic_textures, dim=2) / total roughness_textures = jt.sum(roughness_textures, dim=2) / total elif len(metallic_textures.shape) == 6: total = metallic_textures.shape[2] * metallic_textures.shape[ 3] * metallic_textures.shape[4] * 1.0 metallic_textures = jt.sum(metallic_textures, dim=2) metallic_textures = jt.sum(metallic_textures, dim=2) metallic_textures = jt.sum(metallic_textures, dim=2) metallic_textures = metallic_textures / total roughness_textures = jt.sum(roughness_textures, dim=2) roughness_textures = jt.sum(roughness_textures, dim=2) roughness_textures = jt.sum(roughness_textures, dim=2) roughness_textures = roughness_textures / total #Microfacet model if with_specular and (eye is not None) and (positions is not None) and ( metallic_textures is not None) and (roughness_textures is not None): N = normals if len(eye.shape) == 2: eye = eye.unsqueeze(1) V = jt.normalize(eye - positions, dim=2) L = light_direction H = jt.normalize(V + L, dim=2) #Default Setting metallic = metallic_textures roughness = roughness_textures F0 = jt.array((0.04, 0.04, 0.04), "float32") albedo = jt.array((1.0, 1.0, 1.0), "float32") F0 = F0.unsqueeze(0).unsqueeze(1) * ( 1 - metallic) + albedo.unsqueeze(0).unsqueeze(1) * metallic radiance = light_intensity * (light_color.unsqueeze(1) * cosine.unsqueeze(2)) #Cook-Torrance BRDF NDF = GGX(N, H, roughness) G = GeometrySmith(N, V, L, roughness) F = fresnelSchlick(nn.relu(jt.sum(H * V, dim=2)), F0) KS = F KD = 1.0 - KS KD *= (1.0 - metallic) diffuseLight += KD * radiance numerator = NDF * G * F denominator = (4.0 * nn.relu(jt.sum(N * V, dim=2)) * nn.relu(jt.sum(N * L, dim=2))).unsqueeze(2) specular = numerator / jt.clamp(denominator, 0.01) specularLight += specular * radiance else: diffuseLight += light_intensity * (light_color.unsqueeze(1) * cosine.unsqueeze(2)) if Gbuffer == "normal": specularLight *= 0.0 diffuseLight = normals * 0.5 + 0.5 elif Gbuffer == "depth": specularLight *= 0.0 viewpos = transform.tranpos(positions) diffuseLight = viewpos / jt.max(viewpos[..., 2]) diffuseLight[..., 0] = viewpos[..., 2] / jt.max(viewpos[..., 2]) diffuseLight[..., 1] = viewpos[..., 2] / jt.max(viewpos[..., 2]) return [diffuseLight, specularLight]
def execute(self, net, predictions, targets, masks, num_crowds): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, mask preds, and prior boxes from SSD net. loc shape: jt.size(batch_size,num_priors,4) conf shape: jt.size(batch_size,num_priors,num_classes) masks shape: jt.size(batch_size,num_priors,mask_dim) priors shape: jt.size(num_priors,4) proto* shape: jt.size(batch_size,mask_h,mask_w,mask_dim) targets (list<tensor>): Ground truth boxes and labels for a batch, shape: [batch_size][num_objs,5] (last idx is the label). masks (list<tensor>): Ground truth masks for each object in each image, shape: [batch_size][num_objs,im_height,im_width] num_crowds (list<int>): Number of crowd annotations per batch. The crowd annotations should be the last num_crowds elements of targets and masks. * Only if mask_type == lincomb """ loc_data = predictions['loc'] conf_data = predictions['conf'] mask_data = predictions['mask'] priors = predictions['priors'] if cfg.mask_type == mask_type.lincomb: proto_data = predictions['proto'] score_data = predictions['score'] if cfg.use_mask_scoring else None inst_data = predictions['inst'] if cfg.use_instance_coeff else None labels = [None] * len(targets) # Used in sem segm loss batch_size = loc_data.shape[0] num_priors = priors.shape[0] num_classes = self.num_classes # Match priors (default boxes) and ground truth boxes # These tensors will be created with the same device as loc_data loc_t = jt.empty((batch_size, num_priors, 4),dtype=loc_data.dtype) gt_box_t = jt.empty((batch_size, num_priors, 4),dtype=loc_data.dtype) conf_t = jt.empty((batch_size, num_priors)).int32() idx_t = jt.empty((batch_size, num_priors)).int32() if cfg.use_class_existence_loss: class_existence_t = jt.empty((batch_size, num_classes-1),dtype=loc_data.dtype) # jt.sync(list(predictions.values())) for idx in range(batch_size): truths = targets[idx][:, :-1] labels[idx] = targets[idx][:, -1].int32() if cfg.use_class_existence_loss: # Construct a one-hot vector for each object and collapse it into an existence vector with max # Also it's fine to include the crowd annotations here class_existence_t[idx,:] = jt.eye(num_classes-1)[labels[idx]].max(dim=0)[0] # Split the crowd annotations because they come bundled in cur_crowds = num_crowds[idx] if cur_crowds > 0: split = lambda x: (x[-cur_crowds:], x[:-cur_crowds]) crowd_boxes, truths = split(truths) # We don't use the crowd labels or masks _, labels[idx] = split(labels[idx]) _, masks[idx] = split(masks[idx]) else: crowd_boxes = None match(self.pos_threshold, self.neg_threshold, truths, priors, labels[idx], crowd_boxes, loc_t, conf_t, idx_t, idx, loc_data[idx]) gt_box_t[idx,:,:] = truths[idx_t[idx]] # wrap targets loc_t.stop_grad() conf_t.stop_grad() idx_t.stop_grad() pos = conf_t > 0 num_pos = pos.sum(dim=1, keepdims=True) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.ndim).expand_as(loc_data) losses = {} # Localization Loss (Smooth L1) if cfg.train_boxes: loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) # print(loc_t) losses['B'] = nn.smooth_l1_loss(loc_p, loc_t, reduction='sum') * cfg.bbox_alpha if cfg.train_masks: if cfg.mask_type == mask_type.direct: if cfg.use_gt_bboxes: pos_masks = [] for idx in range(batch_size): pos_masks.append(masks[idx][idx_t[idx, pos[idx]]]) masks_t = jt.contrib.concat(pos_masks, 0) masks_p = mask_data[pos, :].view(-1, cfg.mask_dim) losses['M'] = nn.bce_loss(jt.clamp(masks_p, 0, 1), masks_t, size_average=False) * cfg.mask_alpha else: losses['M'] = self.direct_mask_loss(pos_idx, idx_t, loc_data, mask_data, priors, masks) elif cfg.mask_type == mask_type.lincomb: ret = self.lincomb_mask_loss(pos, idx_t, loc_data, mask_data, priors, proto_data, masks, gt_box_t, score_data, inst_data, labels) if cfg.use_maskiou: loss, maskiou_targets = ret else: loss = ret losses.update(loss) if cfg.mask_proto_loss is not None: if cfg.mask_proto_loss == 'l1': losses['P'] = jt.mean(jt.abs(proto_data)) / self.l1_expected_area * self.l1_alpha elif cfg.mask_proto_loss == 'disj': losses['P'] = -jt.mean(jt.max(nn.log_softmax(proto_data, dim=-1), dim=-1)[0]) # Confidence loss if cfg.use_focal_loss: if cfg.use_sigmoid_focal_loss: losses['C'] = self.focal_conf_sigmoid_loss(conf_data, conf_t) elif cfg.use_objectness_score: losses['C'] = self.focal_conf_objectness_loss(conf_data, conf_t) else: losses['C'] = self.focal_conf_loss(conf_data, conf_t) else: if cfg.use_objectness_score: losses['C'] = self.conf_objectness_loss(conf_data, conf_t, batch_size, loc_p, loc_t, priors) else: losses['C'] = self.ohem_conf_loss(conf_data, conf_t, pos, batch_size) # Mask IoU Loss if cfg.use_maskiou and maskiou_targets is not None: losses['I'] = self.mask_iou_loss(net, maskiou_targets) # These losses also don't depend on anchors if cfg.use_class_existence_loss: losses['E'] = self.class_existence_loss(predictions['classes'], class_existence_t) if cfg.use_semantic_segmentation_loss: losses['S'] = self.semantic_segmentation_loss(predictions['segm'], masks, labels) # Divide all losses by the number of positives. # Don't do it for loss[P] because that doesn't depend on the anchors. total_num_pos = num_pos.sum().float() for k in losses: if k not in ('P', 'E', 'S'): losses[k] /= total_num_pos else: losses[k] /= batch_size # Loss Key: # - B: Box Localization Loss # - C: Class Confidence Loss # - M: Mask Loss # - P: Prototype Loss # - D: Coefficient Diversity Loss # - E: Class Existence Loss # - S: Semantic Segmentation Loss return losses
anchors = [ make_priors(cs, s, ar) for cs, s, ar in zip(conv_sizes, scales, aspect_ratios) ] anchors = np.concatenate(anchors, axis=0) anchors = jt.array(anchors) bboxes_rel = jt.array(bboxes_rel) perGTAnchorMax = jt.zeros(bboxes_rel.shape[0]) chunk_size = 1000 for i in range((bboxes_rel.size(0) // chunk_size) + 1): start = i * chunk_size end = min((i + 1) * chunk_size, bboxes_rel.size(0)) ious = jaccard(bboxes_rel[start:end], anchors) maxes, maxidx = jt.max(ious, dim=1) perGTAnchorMax[start:end] = maxes hits = (perGTAnchorMax > 0.5).float() print('Total recall: %.2f' % (jt.sum(hits) / hits.size(0) * 100)) print() for i, metric in zip(range(3), ('small', 'medium', 'large')): _hits = hits[sizes == i] _size = (1 if _hits.size(0) == 0 else _hits.size(0)) print(metric + ' recall: %.2f' % ((jt.sum(_hits) / _size) * 100))
def compute_hits(bboxes, anchors, iou_threshold=0.5): ious = jaccard(bboxes, anchors) perGTAnchorMax, _ = jt.max(ious, dim=1) return (perGTAnchorMax > iou_threshold)