def compute_accuracy(self, prob_cls, gt_cls): #we only need the detection which >= 0 prob_cls = torch.squeeze(prob_cls) mask = torch.ge(gt_cls, 0) #get valid element valid_gt_cls = torch.masked_select(gt_cls, mask) valid_prob_cls = torch.masked_select(prob_cls, mask) size = min(valid_gt_cls.size()[0], valid_prob_cls.size()[0]) prob_ones = torch.ge(valid_prob_cls, 0.6).float() right_ones = torch.eq(prob_ones, valid_gt_cls.float()).float() return torch.div(torch.mul(torch.sum(right_ones), float(1.0)), float(size))
def test_remote_var_binary_methods(self): ''' Unit tests for methods mentioned on issue 1385 https://github.com/OpenMined/PySyft/issues/1385''' hook = TorchHook(verbose=False) local = hook.local_worker remote = VirtualWorker(hook, 1) local.add_worker(remote) x = Var(torch.FloatTensor([1, 2, 3, 4])).send(remote) y = Var(torch.FloatTensor([[1, 2, 3, 4]])).send(remote) z = torch.matmul(x, y.t()) assert (torch.equal(z.get(), Var(torch.FloatTensor([30])))) z = torch.add(x, y) assert (torch.equal(z.get(), Var(torch.FloatTensor([[2, 4, 6, 8]])))) x = Var(torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]])).send(remote) y = Var(torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]])).send(remote) z = torch.cross(x, y, dim=1) assert (torch.equal(z.get(), Var(torch.FloatTensor([[0, 0, 0], [0, 0, 0], [0, 0, 0]])))) x = Var(torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]])).send(remote) y = Var(torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]])).send(remote) z = torch.dist(x, y) assert (torch.equal(z.get(), Var(torch.FloatTensor([0.])))) x = Var(torch.FloatTensor([1, 2, 3])).send(remote) y = Var(torch.FloatTensor([1, 2, 3])).send(remote) z = torch.dot(x, y) print(torch.equal(z.get(), Var(torch.FloatTensor([14])))) z = torch.eq(x, y) assert (torch.equal(z.get(), Var(torch.ByteTensor([1, 1, 1])))) z = torch.ge(x, y) assert (torch.equal(z.get(), Var(torch.ByteTensor([1, 1, 1]))))
def cls_loss(self,gt_label,pred_label): # get the mask element which >= 0, only 0 and 1 can effect the detection loss pred_label = torch.squeeze(pred_label) mask = torch.ge(gt_label,0) valid_gt_label = torch.masked_select(gt_label,mask).float() valid_pred_label = torch.masked_select(pred_label,mask) return self.loss_cls(valid_pred_label,valid_gt_label)
def test_local_var_binary_methods(self): ''' Unit tests for methods mentioned on issue 1385 https://github.com/OpenMined/PySyft/issues/1385''' x = torch.FloatTensor([1, 2, 3, 4]) y = torch.FloatTensor([[1, 2, 3, 4]]) z = torch.matmul(x, y.t()) assert (torch.equal(z, torch.FloatTensor([30]))) z = torch.add(x, y) assert (torch.equal(z, torch.FloatTensor([[2, 4, 6, 8]]))) x = torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]]) y = torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]]) z = torch.cross(x, y, dim=1) assert (torch.equal(z, torch.FloatTensor([[0, 0, 0], [0, 0, 0], [0, 0, 0]]))) x = torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]]) y = torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]]) z = torch.dist(x, y) t = torch.FloatTensor([z]) assert (torch.equal(t, torch.FloatTensor([0.]))) x = torch.FloatTensor([1, 2, 3]) y = torch.FloatTensor([1, 2, 3]) z = torch.dot(x, y) t = torch.FloatTensor([z]) assert torch.equal(t, torch.FloatTensor([14])) z = torch.eq(x, y) assert (torch.equal(z, torch.ByteTensor([1, 1, 1]))) z = torch.ge(x, y) assert (torch.equal(z, torch.ByteTensor([1, 1, 1])))
def knn(Mxx, Mxy, Myy, k, sqrt): n0 = Mxx.size(0) n1 = Myy.size(0) label = torch.cat((torch.ones(n0),torch.zeros(n1))) M = torch.cat((torch.cat((Mxx,Mxy),1), torch.cat((Mxy.transpose(0,1),Myy), 1)), 0) if sqrt: M = M.abs().sqrt() INFINITY = float('inf') val, idx = (M+torch.diag(INFINITY*torch.ones(n0+n1))).topk(k, 0, False) count = torch.zeros(n0+n1) for i in range(0,k): count = count + label.index_select(0,idx[i]) pred = torch.ge(count, (float(k)/2)*torch.ones(n0+n1)).float() s = Score_knn() s.tp = (pred*label).sum() s.fp = (pred*(1-label)).sum() s.fn = ((1-pred)*label).sum() s.tn = ((1-pred)*(1-label)).sum() s.precision = s.tp/(s.tp+s.fp) s.recall = s.tp/(s.tp+s.fn) s.acc_t = s.tp/(s.tp+s.fn) s.acc_f = s.tn/(s.tn+s.fp) s.acc = torch.eq(label, pred).float().mean() s.k = k return s
def _mask_attentions(attention, image_locs): batch_size, num_loc, n_att = attention.data.shape tmp1 = torch.unsqueeze( torch.arange(0, num_loc).type(torch.LongTensor), dim=0).expand(batch_size, num_loc) tmp1 = tmp1.cuda() if use_cuda else tmp1 tmp2 = torch.unsqueeze(image_locs.data, 1).expand(batch_size, num_loc) mask = torch.ge(tmp1, tmp2) mask = torch.unsqueeze(mask, 2).expand_as(attention) attention.data.masked_fill_(mask, 0) return attention
def updateGradInput(self, input, y): if input[0].size(0) == 1: dist = -y * (input[0][0] - input[1][0]) + self.margin if dist < 0: self.gradInput[0][0] = 0 self.gradInput[1][0] = 0 else: self.gradInput[0][0] = -y self.gradInput[1][0] = y else: if self.dist is None: self.dist = input[0].new() self.dist = self.dist.resize_as_(input[0]).copy_(input[0]) dist = self.dist dist.add_(-1, input[1]) dist.mul_(-1).mul_(y) dist.add_(self.margin) if self.mask is None: self.mask = input[0].new() self.mask = self.mask.resize_as_(input[0]).copy_(dist) mask = self.mask torch.ge(dist, 0, out=mask) self.gradInput[0].resize_(dist.size()) self.gradInput[1].resize_(dist.size()) self.gradInput[0].copy_(mask) self.gradInput[0].mul_(-1).mul_(y) self.gradInput[1].copy_(mask) self.gradInput[1].mul_(y) if self.sizeAverage: self.gradInput[0].div_(y.size(0)) self.gradInput[1].div_(y.size(0)) return self.gradInput
def pruneWeights(self, minWeight): """ Prune all the weights whose absolute magnitude is less than minWeight :param minWeight: min weight to prune. If zero then no pruning :type minWeight: float """ if minWeight == 0.0: return # Collect all weights weights = [v for k, v in self.named_parameters() if 'weight' in k] for w in weights: # Filter weights above threshold mask = torch.ge(torch.abs(w.data), minWeight) # Zero other weights w.data.mul_(mask.type(torch.float32))
def test_remote_tensor_binary_methods(self): hook = TorchHook(verbose = False) local = hook.local_worker remote = VirtualWorker(hook, 0) local.add_worker(remote) x = torch.FloatTensor([1, 2, 3, 4, 5]).send(remote) y = torch.FloatTensor([1, 2, 3, 4, 5]).send(remote) assert (x.add_(y).get() == torch.FloatTensor([2,4,6,8,10])).all() x = torch.FloatTensor([1, 2, 3, 4]).send(remote) y = torch.FloatTensor([[1, 2, 3, 4]]).send(remote) z = torch.matmul(x, y.t()) assert (torch.equal(z.get(), torch.FloatTensor([30]))) z = torch.add(x, y) assert (torch.equal(z.get(), torch.FloatTensor([[2, 4, 6, 8]]))) x = torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]]).send(remote) y = torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]]).send(remote) z = torch.cross(x, y, dim=1) assert (torch.equal(z.get(), torch.FloatTensor([[0, 0, 0], [0, 0, 0], [0, 0, 0]]))) x = torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]]).send(remote) y = torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]]).send(remote) z = torch.dist(x, y) t = torch.FloatTensor([z]) assert (torch.equal(t, torch.FloatTensor([0.]))) x = torch.FloatTensor([1, 2, 3]).send(remote) y = torch.FloatTensor([1, 2, 3]).send(remote) z = torch.dot(x, y) t = torch.FloatTensor([z]) assert torch.equal(t, torch.FloatTensor([14])) z = torch.eq(x, y) assert (torch.equal(z.get(), torch.ByteTensor([1, 1, 1]))) z = torch.ge(x, y) assert (torch.equal(z.get(), torch.ByteTensor([1, 1, 1])))
def pruneDutycycles(self, threshold=0.0): """ Prune all the units whose dutycycles absolute magnitude is less than `threshold * k/n` :param threshold: min threshold to prune. If less than zero then no pruning :type threshold: float """ if threshold < 0.0: return if not hasattr(self, 'dutyCycle'): return # See KWinners targetDesity = float(self.k) / float(self.n) # Units to keep mask = torch.ge(torch.abs(self.dutyCycle), targetDesity * threshold) mask = mask.type(torch.float32).view(self.n, 1) # Zero weights with low dutycycles self.l1.weight.data.mul_(mask)
def test_local_tensor_binary_methods(self): ''' Unit tests for methods mentioned on issue 1385 https://github.com/OpenMined/PySyft/issues/1385''' x = torch.FloatTensor([1, 2, 3, 4]) y = torch.FloatTensor([[1, 2, 3, 4]]) z = torch.matmul(x, y.t()) assert (torch.equal(z, torch.FloatTensor([30]))) z = torch.add(x, y) assert (torch.equal(z, torch.FloatTensor([[2, 4, 6, 8]]))) x = torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]]) y = torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]]) z = torch.cross(x, y, dim=1) assert (torch.equal(z, torch.FloatTensor([[0, 0, 0], [0, 0, 0], [0, 0, 0]]))) x = torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]]) y = torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]]) z = torch.dist(x, y) assert (torch.equal(torch.FloatTensor([z]), torch.FloatTensor([0]))) x = torch.FloatTensor([1, 2, 3]) y = torch.FloatTensor([1, 2, 3]) z = torch.dot(x, y) # There is an issue with some Macs getting 0.0 instead # Solved here: https://github.com/pytorch/pytorch/issues/5609 assert torch.equal(torch.FloatTensor([z]), torch.FloatTensor([14])) z = torch.eq(x, y) assert (torch.equal(z, torch.ByteTensor([1, 1, 1]))) z = torch.ge(x, y) assert (torch.equal(z, torch.ByteTensor([1, 1, 1]))) x = torch.FloatTensor([1, 2, 3, 4, 5]) y = torch.FloatTensor([1, 2, 3, 4, 5]) assert (x.add_(y) == torch.FloatTensor([2, 4, 6, 8, 10])).all()
def predict(self, feed_dict): check_list, embedding_l2 = [], [] train = feed_dict[TRAIN] batch_size = feed_dict[TOTAL_BATCH_SIZE] # = B or_length = feed_dict[K_OR_LENGTH] # O x = feed_dict[X] # B * O * A x_pos_neg = torch.ge(x, 0).float().unsqueeze(-1) # B * O * A * 1 x_valid = torch.gt(torch.abs(x), 0).float() # B * O * A elements = self.feature_embeddings(torch.abs(x)) # B * O * A * V not_elements = self.logic_not(elements) # B * O * A * V elements = x_pos_neg * elements + (-x_pos_neg + 1) * not_elements # B * O * A * V elements = elements * x_valid.unsqueeze(-1) # B * O * A * V constraint = [elements.view([batch_size, -1, self.v_vector_size])] # B * ? * V constraint_valid = [x_valid.view([batch_size, -1])] # B * ? # # 随机打乱顺序计算 all_os, all_ovs = [], [] for o in range(len(or_length)): all_as, all_avs = [], [] for a in range(or_length[o]): all_as.append(elements[:, o, a, :]) # B * V all_avs.append(x_valid[:, o, a].unsqueeze(-1)) # B * 1 while len(all_as) > 1: idx_a, idx_b = 0, 1 if train: idx_a, idx_b = np.random.choice(len(all_as), size=2, replace=False) if idx_a > idx_b: a, av = all_as.pop(idx_a), all_avs.pop( idx_a) # B * V, B * 1 b, bv = all_as.pop(idx_b), all_avs.pop( idx_b) # B * V, B * 1 else: b, bv = all_as.pop(idx_b), all_avs.pop( idx_b) # B * V, B * 1 a, av = all_as.pop(idx_a), all_avs.pop( idx_a) # B * V, B * 1 a_and_b = self.logic_and(a, b, train=train) # B * V abv = av * bv # B * 1 ab = abv * a_and_b + av * (-bv + 1) * a + (-av + 1) * bv * b # B * V all_as.insert(0, ab) all_avs.insert(0, (av + bv).gt(0).float()) constraint.append(ab.view([batch_size, 1, self.v_vector_size])) constraint_valid.append(abv) all_os.append(all_as[0]) all_ovs.append(all_avs[0]) while len(all_os) > 1: idx_a, idx_b = 0, 1 if train: idx_a, idx_b = np.random.choice(len(all_os), size=2, replace=False) if idx_a > idx_b: a, av = all_os.pop(idx_a), all_ovs.pop(idx_a) # B * V, B * 1 b, bv = all_os.pop(idx_b), all_ovs.pop(idx_b) # B * V, B * 1 else: b, bv = all_os.pop(idx_b), all_ovs.pop(idx_b) # B * V, B * 1 a, av = all_os.pop(idx_a), all_ovs.pop(idx_a) # B * V, B * 1 a_or_b = self.logic_or(a, b, train=train) # B * V abv = av * bv # B * 1 ab = abv * a_or_b + av * (-bv + 1) * a + (-av + 1) * bv * b # B * V all_os.insert(0, ab) all_ovs.insert(0, (av + bv).gt(0).float()) constraint.append(ab.view([batch_size, 1, self.v_vector_size])) constraint_valid.append(abv) result_vector = all_os[0] # B * V prediction = self.similarity(result_vector, self.true).view([-1]) check_list.append(('prediction', prediction)) check_list.append(('label', feed_dict[Y])) check_list.append(('true', self.true)) constraint = torch.cat(tuple(constraint), dim=1) constraint_valid = torch.cat(tuple(constraint_valid), dim=1) out_dict = { PREDICTION: prediction, CHECK: check_list, 'constraint': constraint, 'constraint_valid': constraint_valid, EMBEDDING_L2: embedding_l2 } return out_dict
def test_ge(x, y): c = torch.ge(torch.add(x, y), y) return c
def build_mobilenetv2_pruned_model(origin_model): pruning_rate_now = 0 channel_prune_rate = 0.9 while pruning_rate_now < args.pruning_rate: score = [] index_cfg = [] layer_cfg = [] final_mask = [] pruned_state_dict = {} # Get importance criteria for each channel for i in range(17): mask = origin_model.state_dict()['mask.' + str(i)] score.append(torch.abs(torch.div(torch.sum(mask, 0), 2))) final_mask.append(torch.div(torch.sum(mask, 0), 2)) all_score = torch.cat(score, 0) preserve_num = int(all_score.size(0) * channel_prune_rate) preserve_channel, _ = torch.topk(all_score, preserve_num) threshold = preserve_channel[preserve_num - 1] # Based on the pruning threshold, the pruning rate of each layer is obtained for mini_score in score: mask = torch.ge(mini_score, threshold) index = [] for i, m in enumerate(mask): if m == True: index.append(i) if len(index) < mask.size(0) * args.min_preserve: _, index = torch.topk(mini_score, int(mask.size(0) * args.min_preserve)) index = index.cpu().numpy().tolist() index_cfg.append(index) layer_cfg.append(len(index)) layer_cfg.append(640) last_index = random.sample(range(0, 1280), 640) last_index.sort() index_cfg.append(last_index) last_index = torch.LongTensor(last_index).to(device) model = import_module(f'model.{args.arch}').MobileNetV2( wm=1, layer_cfg=layer_cfg).to(device) flops, params = profile(model, inputs=(input, )) pruning_rate_now = (oriflops - flops) / oriflops channel_prune_rate = channel_prune_rate - 0.01 model_state_dict = origin_model.state_dict() current_layer = 0 model = import_module(f'model.{args.arch}').MobileNetV2( wm=1, layer_cfg=layer_cfg).to(device) pruned_state_dict = model.state_dict() for name, module in origin_model.named_modules(): if isinstance(module, Block_class): # conv1 & bn1 index = torch.LongTensor(index_cfg[current_layer]).to(device) pruned_weight = torch.index_select( model_state_dict[name + '.conv1.weight'], 0, index).cpu() pruned_state_dict[name + '.conv1.weight'] = pruned_weight mask = final_mask[current_layer][index_cfg[current_layer]] pruned_state_dict[name + '.bn1.weight'] = torch.mul( mask, model_state_dict[name + '.bn1.weight'][index]).cpu() pruned_state_dict[name + '.bn1.bias'] = torch.mul( mask, model_state_dict[name + '.bn1.bias'][index]).cpu() pruned_state_dict[name + '.bn1.running_var'] = model_state_dict[ name + '.bn1.running_var'][index].cpu() pruned_state_dict[name + '.bn1.running_mean'] = model_state_dict[ name + '.bn1.running_mean'][index].cpu() # conv2 & bn2 pruned_weight = torch.index_select( model_state_dict[name + '.conv2.weight'], 0, index).cpu() pruned_state_dict[name + '.conv2.weight'] = pruned_weight pruned_state_dict[name + '.bn2.weight'] = torch.mul( mask, model_state_dict[name + '.bn2.weight'][index]).cpu() pruned_state_dict[name + '.bn2.bias'] = torch.mul( mask, model_state_dict[name + '.bn2.bias'][index]).cpu() pruned_state_dict[name + '.bn2.running_var'] = model_state_dict[ name + '.bn2.running_var'][index].cpu() pruned_state_dict[name + '.bn2.running_mean'] = model_state_dict[ name + '.bn2.running_mean'][index].cpu() # conv3 & bn3 & shortcut pruned_state_dict[name + '.conv3.weight'] = direct_project( model_state_dict[name + '.conv3.weight'], index).cpu() pruned_state_dict[name + '.bn3.weight'] = model_state_dict[ name + '.bn3.weight'].cpu() pruned_state_dict[name + '.bn3.bias'] = model_state_dict[ name + '.bn3.bias'].cpu() pruned_state_dict[name + '.bn3.running_var'] = model_state_dict[ name + '.bn3.running_var'].cpu() pruned_state_dict[name + '.bn3.running_mean'] = model_state_dict[ name + '.bn3.running_mean'].cpu() current_layer += 1 pruned_state_dict['conv1.weight'] = model_state_dict['conv1.weight'].cpu() pruned_state_dict['bn1.weight'] = model_state_dict['bn1.weight'].cpu() pruned_state_dict['bn1.bias'] = model_state_dict['bn1.bias'].cpu() pruned_state_dict['bn1.running_var'] = model_state_dict[ 'bn1.running_var'].cpu() pruned_state_dict['bn1.running_mean'] = model_state_dict[ 'bn1.running_mean'].cpu() pruned_state_dict['conv2.weight'] = torch.index_select( model_state_dict['conv2.weight'], 0, last_index).cpu() pruned_state_dict['bn2.weight'] = model_state_dict['bn2.weight'][ last_index].cpu() pruned_state_dict['bn2.bias'] = model_state_dict['bn2.bias'][ last_index].cpu() pruned_state_dict['bn2.running_var'] = model_state_dict['bn2.running_var'][ last_index].cpu() pruned_state_dict['bn2.running_mean'] = model_state_dict[ 'bn2.running_mean'][last_index].cpu() fc_weight = model_state_dict['linear.weight'] pr_fc_weight = torch.randn(fc_weight.size(0), len(last_index)) for i, ind in enumerate(last_index): pr_fc_weight[:, i] = fc_weight[:, ind] pruned_state_dict['linear.weight'] = pr_fc_weight.cpu() pruned_state_dict['linear.bias'] = model_state_dict['linear.bias'] # load weight model = import_module(f'model.{args.arch}').MobileNetV2( wm=1, layer_cfg=layer_cfg).to(device) model.load_state_dict(pruned_state_dict) return model, layer_cfg, flops, params
log = Logger() log2 = Logger() path_train_plot = os.path.join(args.logs_path, 'training.png') if test: num_iterations = num_examples_test // batch_size start = time.time() for it in range(num_iterations): batch = gen.sample_batch(batch_size, is_training=not test, it=it) weights, volumes, C, OptW, OptV, is_chosen_opt = batch bs, N = weights.size() Ns = torch.ones(batch_size).type(dtype_l)*N NNs = Ns.float().unsqueeze(1).expand(bs,N) NNs = torch.ge(NNs, torch.arange(1,N+1).type(dtype).unsqueeze(0).expand(bs,N)).float() if test: loss = Variable(torch.zeros(1).type(dtype)) w, c, (Ns, NNs) = execute(Knap, scales, weights, volumes, C,(Ns, NNs), n_samples, 'test') else: loss, w, c, (Ns, NNs) = execute(Knap, scales, weights, volumes, C,(Ns, NNs), n_samples, 'train') trivial_w = trivial_algorithm(weights.data, volumes.data, C.data) if not test: Knap.zero_grad() loss.backward() nn.utils.clip_grad_norm(Knap.parameters(), clip_grad_norm)
def criterion(self, outs, annos): loc_preds, cls_preds = outs # (bs, AnchorN, ClassN), (bs, AnchorN, 4), e.g., (4, 97965, 4) bs_num = cls_preds.size(0) annos[:, :, 2] += annos[:, :, 0] # (bs, AnnoN, 8) annos[:, :, 3] += annos[:, :, 1] cls_losses = [] reg_losses = [] for n in range(bs_num): anno = annos[n] # (AnnoN, 8), e.g., (97965, 8) iou = bbox_iou( anno[:, :4], self.anchors) # (AnnoN, AnchorN) e.g., (101, 97965) max_iou, max_idx = torch.max(iou, dim=0) # (AnchorN) pos_idx = torch.ge(max_iou, 0.5) neg_idx = torch.lt(max_iou, 0.4) cls_idx = pos_idx + neg_idx # I. Classification loss cls_target = torch.zeros_like(cls_preds[n]) cls_pred = cls_preds[n][cls_idx, :] assigned_anno = anno[max_idx[pos_idx], :] cls_target[pos_idx, assigned_anno[:, 5].long() - 1] = 1 cls_target = cls_target[cls_idx] cls_loss = self.focal_loss(cls_pred, cls_target) / max( 1., pos_idx.sum().float()) cls_losses.append(cls_loss) # II. Regression loss if pos_idx.sum() > 0: anchor_widths_pi = self.anchors_widths[pos_idx] anchor_heights_pi = self.anchors_heights[pos_idx] anchor_ctr_x_pi = self.anchors_ctr_x[pos_idx] anchor_ctr_y_pi = self.anchors_ctr_y[pos_idx] gt_widths = assigned_anno[:, 2] - assigned_anno[:, 0] gt_heights = assigned_anno[:, 3] - assigned_anno[:, 1] gt_ctr_x = assigned_anno[:, 0] + 0.5 * gt_widths gt_ctr_y = assigned_anno[:, 1] + 0.5 * gt_heights gt_widths = torch.clamp(gt_widths, min=1) gt_heights = torch.clamp(gt_heights, min=1) with torch.no_grad(): target_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi target_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi target_dw = torch.log(gt_widths / anchor_widths_pi) target_dh = torch.log(gt_heights / anchor_heights_pi) reg_target = torch.stack( (target_dx, target_dy, target_dw, target_dh)) reg_target = reg_target.t() reg_target = reg_target / torch.tensor( [[0.1, 0.1, 0.2, 0.2]]).cuda() reg_pred = loc_preds[n, pos_idx, :] # reg_loss = F.smooth_l1_loss(reg_pred, reg_target.detach()) regression_diff = torch.abs(reg_target.detach() - reg_pred) reg_loss = torch.where( torch.le(regression_diff, 1.0 / 9.0), 0.5 * 9.0 * torch.pow(regression_diff, 2), regression_diff - 0.5 / 9.0) reg_losses.append(reg_loss.mean()) else: reg_losses.append(torch.zeros(1).to(loc_preds.device)) return sum(cls_losses) / bs_num, sum(reg_losses) / bs_num
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument('--csv_anots', help='Path to file containing list of anotations') parser.add_argument('--images_dir', help='images base folder') parser.add_argument('--save_dir', help='output directory for generated images') parser = parser.parse_args(args) if parser.dataset == 'csv': dataset = CSVDataset(train_file=parser.csv_anots, class_list=parser.csv_classes, images_dir=parser.images_dir, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sample_image = (dataset.load_image(0) * 255).astype(np.int32) sample_batch = np.expand_dims(sample_image, axis=0) sample_batch = sample_batch.transpose(0, 3, 1, 2) anchros_mudole = Anchors() anchors = anchros_mudole(sample_batch) for i in range(len(dataset)): image = (dataset.load_image(i) * 255).astype(np.int32) anots = dataset.load_annotations(i) distance = calc_distance(torch.tensor(anchors[0, :, :]), torch.tensor(anots[:, :NUM_VARIABLES])) distance_min, distance_argmin = torch.min(distance, dim=1) # num_anchors x 1 targets = torch.ones((anchors.shape[1], 1)) * -1 targets[torch.ge(distance_min, 13 * MAX_ANOT_ANCHOR_POSITION_DISTANCE), :] = 0 positive_indices = torch.le(distance_min, 11 * MAX_ANOT_ANCHOR_POSITION_DISTANCE) num_positive_anchors = positive_indices.sum() # assigned_annotations = center_alpha_annotation[deltaphi_argmin, :] # no different in result assigned_annotations = anots[distance_argmin, :] targets[positive_indices, :] = 0 targets[positive_indices, assigned_annotations[positive_indices, 3]] = 1 _anchors = anchors[0, :, :] for anchor in _anchors[targets.squeeze() == 1]: x, y, alpha = anchor[0], anchor[1], 90 - anchor[2] image = draw_line(image, (x, y), alpha, line_color=(0, 255, 0), center_color=(0, 0, 255), half_line=True, distance_thresh=60) for anot in anots: x, y, alpha = anot[0], anot[1], 90 - anot[2] image = draw_line(image, (x, y), alpha, line_color=(0, 0, 0), center_color=(255, 0, 0), half_line=True) for anchor in _anchors[targets.squeeze() == -1]: x, y, alpha = anchor[0], anchor[1], 90 - anchor[2] image = draw_line(image, (x, y), alpha, line_color=(255, 255, 0), center_color=(0, 0, 255), half_line=True, distance_thresh=40, line_thickness=2) image_name = os.path.basename(dataset.image_names[i]) cv.imwrite(os.path.join(parser.save_dir, image_name), cv.cvtColor(image.astype(np.uint8), cv.COLOR_RGB2BGR))
def __call__(self, matched_idxs): # type: (List[Tensor]) -> Tuple[List[Tensor], List[Tensor]] """ Arguments: matched idxs: list of tensors containing -1, 0 or positive values. Each tensor corresponds to a specific image. -1 values are ignored, 0 are considered as negatives and > 0 as positives. Returns: pos_idx (list[tensor]) neg_idx (list[tensor]) Returns two lists of binary masks for each image. The first list contains the positive elements that were selected, and the second list the negative example. """ pos_idx = [] neg_idx = [] # 遍历每张图像的matched_idxs for matched_idxs_per_image in matched_idxs: # >= 1的为正样本, nonzero返回非零元素索引 # positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1) positive = torch.where(torch.ge(matched_idxs_per_image, 1))[0] # = 0的为负样本 # negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1) negative = torch.where(torch.eq(matched_idxs_per_image, 0))[0] # 指定正样本的数量 num_pos = int(self.batch_size_per_image * self.positive_fraction) # protect against not enough positive examples # 如果正样本数量不够就直接采用所有正样本 num_pos = min(positive.numel(), num_pos) # 指定负样本数量 num_neg = self.batch_size_per_image - num_pos # protect against not enough negative examples # 如果负样本数量不够就直接采用所有负样本 num_neg = min(negative.numel(), num_neg) # randomly select positive and negative examples # Returns a random permutation of integers from 0 to n - 1. # 随机选择指定数量的正负样本 perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] pos_idx_per_image = positive[perm1] neg_idx_per_image = negative[perm2] # create binary mask from indices pos_idx_per_image_mask = torch.zeros_like(matched_idxs_per_image, dtype=torch.uint8) neg_idx_per_image_mask = torch.zeros_like(matched_idxs_per_image, dtype=torch.uint8) pos_idx_per_image_mask[pos_idx_per_image] = 1 neg_idx_per_image_mask[neg_idx_per_image] = 1 pos_idx.append(pos_idx_per_image_mask) neg_idx.append(neg_idx_per_image_mask) return pos_idx, neg_idx
def test_comparison_ops_with_type_promotion(self, device): value_for_type = { torch.uint8: (1 << 5), torch.int8: (1 << 5), torch.int16: (1 << 10), torch.int32: (1 << 20), torch.int64: (1 << 35), torch.float16: (1 << 10), torch.float32: (1 << 20), torch.float64: (1 << 35), torch.complex64: (1 << 20), torch.complex128: (1 << 35) } comparison_ops = [ dict( name="lt", out_op=lambda x, y, d: torch.lt( x, y, out=torch.empty(1, dtype=torch.bool, device=d)), ret_op=lambda x, y: torch.lt(x, y), compare_op=lambda x, y: x < y, ), dict( name="le", out_op=lambda x, y, d: torch.le( x, y, out=torch.empty(1, dtype=torch.bool, device=d)), ret_op=lambda x, y: torch.le(x, y), compare_op=lambda x, y: x <= y, ), dict( name="gt", out_op=lambda x, y, d: torch.gt( x, y, out=torch.empty(1, dtype=torch.bool, device=d)), ret_op=lambda x, y: torch.gt(x, y), compare_op=lambda x, y: x > y, ), dict( name="ge", out_op=lambda x, y, d: torch.ge( x, y, out=torch.empty(1, dtype=torch.bool, device=d)), ret_op=lambda x, y: torch.ge(x, y), compare_op=lambda x, y: x >= y, ), dict( name="eq", out_op=lambda x, y, d: torch.eq( x, y, out=torch.empty(1, dtype=torch.bool, device=d)), ret_op=lambda x, y: torch.eq(x, y), compare_op=lambda x, y: x == y, ), dict( name="ne", out_op=lambda x, y, d: torch.ne( x, y, out=torch.empty(1, dtype=torch.bool, device=d)), ret_op=lambda x, y: torch.ne(x, y), compare_op=lambda x, y: x != y, ), ] for op in comparison_ops: for dt1 in torch.testing.get_all_math_dtypes(device): for dt2 in torch.testing.get_all_math_dtypes(device): if (dt1.is_complex or dt2.is_complex ) and not (op["name"] == "eq" or op["name"] == "ne"): continue val1 = value_for_type[dt1] val2 = value_for_type[dt2] t1 = torch.tensor([val1], dtype=dt1, device=device) t2 = torch.tensor([val2], dtype=dt2, device=device) expected = torch.tensor([op["compare_op"](val1, val2)], dtype=torch.bool) out_res = op["out_op"](t1, t2, device) self.assertEqual(out_res, expected) self.assertTrue(out_res.dtype == torch.bool) self.assertTrue(t1.dtype == dt1) self.assertTrue(t2.dtype == dt2) out_res = op["ret_op"](t1, t2) self.assertEqual(out_res, expected) self.assertTrue(out_res.dtype == torch.bool) self.assertTrue(t1.dtype == dt1) self.assertTrue(t2.dtype == dt2) # test that comparing a zero dim tensor with another zero dim tensor has type promotion behavior t1 = torch.tensor(val1, dtype=dt1, device=device) t2 = torch.tensor(val2, dtype=dt2, device=device) expected = torch.tensor(op["compare_op"](val1, val2), dtype=torch.bool) out_res = op["out_op"](t1, t2, device) self.assertEqual(out_res, expected) self.assertTrue(out_res.dtype == torch.bool) self.assertTrue(t1.dtype == dt1) self.assertTrue(t2.dtype == dt2) out_res = op["ret_op"](t1, t2) self.assertEqual(out_res, expected) self.assertTrue(out_res.dtype == torch.bool) self.assertTrue(t1.dtype == dt1) self.assertTrue(t2.dtype == dt2)
def get_clip_mask(self): log_alpha = self.log_alpha return torch.ge(log_alpha, self.thresh)
def forward(self, classifications, regressions, anchors, annotations): alpha = 0.25 gamma = 2.0 bs = classifications.shape[0] classification_losses = [] regression_losses = [] anchor = anchors[0, :, :] anchor_widths = anchor[:, 2] - anchor[:, 0] anchor_heights = anchor[:, 3] - anchor[:, 1] anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights for b in range(bs): classification = classifications[b, :, :] regression = regressions[b, :, :] bbox_annotation = annotations[b, :, :] bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1] if bbox_annotation.shape[0] == 0: if torch.cuda.is_available(): regression_losses.append(torch.tensor(0).float().cuda()) classification_losses.append( torch.tensor(0).float().cuda()) else: regression_losses.append(torch.tensor(0).float()) classification_losses.append(torch.tensor(0).float()) continue classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4) IoU = calculate_IoU(anchors[0, :, :], bbox_annotation[:, :4]) IoU_max, IoU_argmax = torch.max(IoU, dim=1) targets = torch.ones(classification.shape) * -1 if torch.cuda.is_available(): targets = targets.cuda() targets[torch.lt(IoU_max, 0.4), :] = 0 positive_idxs = torch.ge(IoU_max, 0.5) num_positive_anchors = positive_idxs.sum() assigned_annotations = bbox_annotation[IoU_argmax, :] targets[positive_idxs, :] = 0 targets[positive_idxs, assigned_annotations[positive_idxs, 4].long()] = 1 alpha_factor = torch.ones(targets.shape) * alpha if torch.cuda.is_available(): alpha_factor = alpha_factor.cuda() alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor) focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification) focal_weight = alpha_factor * torch.pow(focal_weight, gamma) bce = -(targets * torch.log(classification) + (1. - targets) * torch.log(1. - classification)) cls_loss = focal_weight * bce zeros = torch.zeros(cls_loss.shape) if torch.cuda.is_available(): zeros = zeros.cuda() cls_loss = torch.where(torch.ne(targets, -1.), cls_loss, zeros) classification_losses.append( cls_loss.sum() / torch.clamp(num_positive_anchors.float(), min=1.0)) if positive_idxs.sum() > 0: assigned_annotations = assigned_annotations[positive_idxs, :] anchor_widths_pi = anchor_widths[positive_idxs] anchor_heights_pi = anchor_heights[positive_idxs] anchor_ctr_x_pi = anchor_ctr_x[positive_idxs] anchor_ctr_y_pi = anchor_ctr_y[positive_idxs] gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0] gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1] gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights gt_widths = torch.clamp(gt_widths, min=1) gt_heights = torch.clamp(gt_heights, min=1) targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi targets_dw = torch.log(gt_widths / anchor_widths_pi) targets_dh = torch.log(gt_heights / anchor_heights_pi) targets = torch.stack( (targets_dx, targets_dy, targets_dw, targets_dh)) targets = targets.t() norm = torch.Tensor([[0.1, 0.1, 0.2, 0.2]]) if torch.cuda.is_available(): norm = norm.cuda() targets = targets / norm regression_diff = torch.abs(targets - regression[positive_idxs, :]) regression_loss = torch.where( torch.le(regression_diff, 1.0 / 9.0), 0.5 * 9.0 * torch.pow(regression_diff, 2), regression_diff - 0.5 / 9.0) regression_losses.append(regression_loss.mean()) else: if torch.cuda.is_available(): regression_losses.append(torch.tensor(0).float().cuda()) else: regression_losses.append(torch.tensor(0).float()) return torch.stack(classification_losses).mean(dim=0, keepdim=True),\ torch.stack(regression_losses).mean(dim=0, keepdim=True)
def vtln_warp_freq(vtln_low_cutoff, vtln_high_cutoff, low_freq, high_freq, vtln_warp_factor, freq): r"""This computes a VTLN warping function that is not the same as HTK's one, but has similar inputs (this function has the advantage of never producing empty bins). This function computes a warp function F(freq), defined between low_freq and high_freq inclusive, with the following properties: F(low_freq) == low_freq F(high_freq) == high_freq The function is continuous and piecewise linear with two inflection points. The lower inflection point (measured in terms of the unwarped frequency) is at frequency l, determined as described below. The higher inflection point is at a frequency h, determined as described below. If l <= f <= h, then F(f) = f/vtln_warp_factor. If the higher inflection point (measured in terms of the unwarped frequency) is at h, then max(h, F(h)) == vtln_high_cutoff. Since (by the last point) F(h) == h/vtln_warp_factor, then max(h, h/vtln_warp_factor) == vtln_high_cutoff, so h = vtln_high_cutoff / max(1, 1/vtln_warp_factor). = vtln_high_cutoff * min(1, vtln_warp_factor). If the lower inflection point (measured in terms of the unwarped frequency) is at l, then min(l, F(l)) == vtln_low_cutoff This implies that l = vtln_low_cutoff / min(1, 1/vtln_warp_factor) = vtln_low_cutoff * max(1, vtln_warp_factor) Args: vtln_low_cutoff (float): Lower frequency cutoffs for VTLN vtln_high_cutoff (float): Upper frequency cutoffs for VTLN low_freq (float): Lower frequency cutoffs in mel computation high_freq (float): Upper frequency cutoffs in mel computation vtln_warp_factor (float): Vtln warp factor freq (torch.Tensor): given frequency in Hz Returns: torch.Tensor: Freq after vtln warp """ assert vtln_low_cutoff > low_freq, 'be sure to set the vtln_low option higher than low_freq' assert vtln_high_cutoff < high_freq, 'be sure to set the vtln_high option lower than high_freq [or negative]' l = vtln_low_cutoff * max(1.0, vtln_warp_factor) h = vtln_high_cutoff * min(1.0, vtln_warp_factor) scale = 1.0 / vtln_warp_factor Fl = scale * l # F(l) Fh = scale * h # F(h) assert l > low_freq and h < high_freq # slope of left part of the 3-piece linear function scale_left = (Fl - low_freq) / (l - low_freq) # [slope of center part is just "scale"] # slope of right part of the 3-piece linear function scale_right = (high_freq - Fh) / (high_freq - h) res = torch.empty_like(freq) outside_low_high_freq = torch.lt(freq, low_freq) | torch.gt( freq, high_freq) # freq < low_freq || freq > high_freq before_l = torch.lt(freq, l) # freq < l before_h = torch.lt(freq, h) # freq < h after_h = torch.ge(freq, h) # freq >= h # order of operations matter here (since there is overlapping frequency regions) res[after_h] = high_freq + scale_right * (freq[after_h] - high_freq) res[before_h] = scale * freq[before_h] res[before_l] = low_freq + scale_left * (freq[before_l] - low_freq) res[outside_low_high_freq] = freq[outside_low_high_freq] return res
def main(): # set the path to pre-trained model and output pre_trained_net = './pre_trained/' + args.net_type + '_' + args.dataset + '.pth' args.outf = args.outf + args.net_type + '_' + args.dataset + '/' if not os.path.exists(args.outf): os.makedirs(args.outf) torch.cuda.manual_seed(0) torch.cuda.set_device(args.gpu) # check the in-distribution dataset if args.dataset == 'cifar100': args.num_classes = 100 adv_noise = 0.05 # load networks if args.net_type == 'densenet': if args.dataset == 'svhn': model = models.DenseNet3(100, int(args.num_classes)) model.load_state_dict( torch.load(pre_trained_net, map_location="cuda:" + str(args.gpu))) else: model = torch.load(pre_trained_net, map_location="cuda:" + str(args.gpu)) for i, (name, module) in enumerate(model._modules.items()): module = recursion_change_bn(model) for m in model.modules(): if 'Conv' in str(type(m)): setattr(m, 'padding_mode', 'zeros') in_transform = transforms.Compose([transforms.ToTensor(), \ transforms.Normalize((125.3/255, 123.0/255, 113.9/255), \ (63.0/255, 62.1/255.0, 66.7/255.0)),]) min_pixel = -1.98888885975 max_pixel = 2.12560367584 if args.dataset == 'cifar10': random_noise_size = 0.21 / 4 elif args.dataset == 'cifar100': random_noise_size = 0.21 / 8 else: random_noise_size = 0.21 / 4 elif args.net_type == 'resnet': model = models.ResNet34(num_c=args.num_classes) model.load_state_dict( torch.load(pre_trained_net, map_location="cuda:" + str(args.gpu))) in_transform = transforms.Compose([transforms.ToTensor(), \ transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),]) min_pixel = -2.42906570435 max_pixel = 2.75373125076 if args.dataset == 'cifar10': random_noise_size = 0.25 / 4 elif args.dataset == 'cifar100': random_noise_size = 0.25 / 8 else: random_noise_size = 0.25 / 4 model.cuda() print('load model: ' + args.net_type) # load dataset print('load target data: ', args.dataset) _, test_loader = data_loader.getTargetDataSet(args.dataset, args.batch_size, in_transform, args.dataroot) print('Attack: ' + 'FGSM' + ', Dist: ' + args.dataset + '\n') model.eval() adv_data_tot, clean_data_tot, noisy_data_tot = 0, 0, 0 label_tot = 0 correct, adv_correct, noise_correct = 0, 0, 0 total, generated_noise = 0, 0 criterion = nn.CrossEntropyLoss().cuda() selected_list = [] selected_index = 0 for data, target in test_loader: data, target = data.cuda(), target.cuda() data, target = Variable(data, volatile=True), Variable(target) output = model(data) # compute the accuracy pred = output.data.max(1)[1] equal_flag = pred.eq(target.data).cpu() correct += equal_flag.sum() noisy_data = torch.add(data.data, random_noise_size, torch.randn(data.size()).cuda()) noisy_data = torch.clamp(noisy_data, min_pixel, max_pixel) if total == 0: clean_data_tot = data.clone().data.cpu() label_tot = target.clone().data.cpu() noisy_data_tot = noisy_data.clone().cpu() else: clean_data_tot = torch.cat( (clean_data_tot, data.clone().data.cpu()), 0) label_tot = torch.cat((label_tot, target.clone().data.cpu()), 0) noisy_data_tot = torch.cat( (noisy_data_tot, noisy_data.clone().cpu()), 0) # generate adversarial model.zero_grad() inputs = Variable(data.data, requires_grad=True) output = model(inputs) loss = criterion(output, target) loss.backward() gradient = torch.ge(inputs.grad.data, 0) gradient = (gradient.float() - 0.5) * 2 if args.net_type == 'densenet': gradient.index_copy_(1, torch.LongTensor([0]).cuda(), \ gradient.index_select(1, torch.LongTensor([0]).cuda()) / (63.0/255.0)) gradient.index_copy_(1, torch.LongTensor([1]).cuda(), \ gradient.index_select(1, torch.LongTensor([1]).cuda()) / (62.1/255.0)) gradient.index_copy_(1, torch.LongTensor([2]).cuda(), \ gradient.index_select(1, torch.LongTensor([2]).cuda()) / (66.7/255.0)) else: gradient.index_copy_(1, torch.LongTensor([0]).cuda(), \ gradient.index_select(1, torch.LongTensor([0]).cuda()) / (0.2023)) gradient.index_copy_(1, torch.LongTensor([1]).cuda(), \ gradient.index_select(1, torch.LongTensor([1]).cuda()) / (0.1994)) gradient.index_copy_(1, torch.LongTensor([2]).cuda(), \ gradient.index_select(1, torch.LongTensor([2]).cuda()) / (0.2010)) adv_data = torch.add(inputs.data, adv_noise, gradient) adv_data = torch.clamp(adv_data, min_pixel, max_pixel) # measure the noise temp_noise_max = torch.abs( (data.data - adv_data).view(adv_data.size(0), -1)) temp_noise_max, _ = torch.max(temp_noise_max, dim=1) generated_noise += torch.sum(temp_noise_max) if total == 0: flag = 1 adv_data_tot = adv_data.clone().cpu() else: adv_data_tot = torch.cat((adv_data_tot, adv_data.clone().cpu()), 0) output = model(Variable(adv_data, volatile=True)) # compute the accuracy pred = output.data.max(1)[1] equal_flag_adv = pred.eq(target.data).cpu() adv_correct += equal_flag_adv.sum() output = model(Variable(noisy_data, volatile=True)) # compute the accuracy pred = output.data.max(1)[1] equal_flag_noise = pred.eq(target.data).cpu() noise_correct += equal_flag_noise.sum() for i in range(data.size(0)): if equal_flag[i] == 1 and equal_flag_noise[ i] == 1 and equal_flag_adv[i] == 0: selected_list.append(selected_index) selected_index += 1 total += data.size(0) selected_list = torch.LongTensor(selected_list) clean_data_tot = torch.index_select(clean_data_tot, 0, selected_list) adv_data_tot = torch.index_select(adv_data_tot, 0, selected_list) noisy_data_tot = torch.index_select(noisy_data_tot, 0, selected_list) label_tot = torch.index_select(label_tot, 0, selected_list) torch.save( clean_data_tot, '%s/clean_data_%s_%s_%s.pth' % (args.outf, args.net_type, args.dataset, 'FGSM')) torch.save( adv_data_tot, '%s/adv_data_%s_%s_%s.pth' % (args.outf, args.net_type, args.dataset, 'FGSM')) torch.save( noisy_data_tot, '%s/noisy_data_%s_%s_%s.pth' % (args.outf, args.net_type, args.dataset, 'FGSM')) torch.save( label_tot, '%s/label_%s_%s_%s.pth' % (args.outf, args.net_type, args.dataset, 'FGSM')) print('Adversarial Noise:({:.2f})\n'.format(generated_noise / total)) print('Final Accuracy: {}/{} ({:.2f}%)\n'.format(correct, total, 100. * correct / total)) print('Adversarial Accuracy: {}/{} ({:.2f}%)\n'.format( adv_correct, total, 100. * adv_correct / total)) print('Noisy Accuracy: {}/{} ({:.2f}%)\n'.format( noise_correct, total, 100. * noise_correct / total))
def forward( self, # type: ignore question: Dict[str, torch.LongTensor], passage: Dict[str, torch.LongTensor], span_start: torch.IntTensor = None, span_end: torch.IntTensor = None, p1_answer_marker: torch.IntTensor = None, p2_answer_marker: torch.IntTensor = None, p3_answer_marker: torch.IntTensor = None, yesno_list: torch.IntTensor = None, followup_list: torch.IntTensor = None, metadata: List[Dict[str, Any]] = None) -> Dict[str, torch.Tensor]: # pylint: disable=arguments-differ """ Parameters ---------- question : Dict[str, torch.LongTensor] From a ``TextField``. passage : Dict[str, torch.LongTensor] From a ``TextField``. The model assumes that this passage contains the answer to the question, and predicts the beginning and ending positions of the answer within the passage. span_start : ``torch.IntTensor``, optional From an ``IndexField``. This is one of the things we are trying to predict - the beginning position of the answer with the passage. This is an `inclusive` token index. If this is given, we will compute a loss that gets included in the output dictionary. span_end : ``torch.IntTensor``, optional From an ``IndexField``. This is one of the things we are trying to predict - the ending position of the answer with the passage. This is an `inclusive` token index. If this is given, we will compute a loss that gets included in the output dictionary. p1_answer_marker : ``torch.IntTensor``, optional This is one of the inputs, but only when num_context_answers > 0. This is a tensor that has a shape [batch_size, max_qa_count, max_passage_length]. Most passage token will have assigned 'O', except the passage tokens belongs to the previous answer in the dialog, which will be assigned labels such as <1_start>, <1_in>, <1_end>. For more details, look into dataset_readers/util/make_reading_comprehension_instance_quac p2_answer_marker : ``torch.IntTensor``, optional This is one of the inputs, but only when num_context_answers > 1. It is similar to p1_answer_marker, but marking previous previous answer in passage. p3_answer_marker : ``torch.IntTensor``, optional This is one of the inputs, but only when num_context_answers > 2. It is similar to p1_answer_marker, but marking previous previous previous answer in passage. yesno_list : ``torch.IntTensor``, optional This is one of the outputs that we are trying to predict. Three way classification (the yes/no/not a yes no question). followup_list : ``torch.IntTensor``, optional This is one of the outputs that we are trying to predict. Three way classification (followup / maybe followup / don't followup). metadata : ``List[Dict[str, Any]]``, optional If present, this should contain the question ID, original passage text, and token offsets into the passage for each instance in the batch. We use this for computing official metrics using the official SQuAD evaluation script. The length of this list should be the batch size, and each dictionary should have the keys ``id``, ``original_passage``, and ``token_offsets``. If you only want the best span string and don't care about official metrics, you can omit the ``id`` key. Returns ------- An output dictionary consisting of the followings. Each of the followings is a nested list because first iterates over dialog, then questions in dialog. qid : List[List[str]] A list of list, consisting of question ids. followup : List[List[int]] A list of list, consisting of continuation marker prediction index. (y :yes, m: maybe follow up, n: don't follow up) yesno : List[List[int]] A list of list, consisting of affirmation marker prediction index. (y :yes, x: not a yes/no question, n: np) best_span_str : List[List[str]] If sufficient metadata was provided for the instances in the batch, we also return the string from the original passage that the model thinks is the best answer to the question. loss : torch.FloatTensor, optional A scalar loss to be optimised. """ batch_size, max_qa_count, max_q_len, _ = question[ 'token_characters'].size() total_qa_count = batch_size * max_qa_count qa_mask = torch.ge(followup_list, 0).view(total_qa_count) embedded_question = self._text_field_embedder(question, num_wrapping_dims=1) embedded_question = embedded_question.reshape( total_qa_count, max_q_len, self._text_field_embedder.get_output_dim()) embedded_question = self._variational_dropout(embedded_question) embedded_passage = self._variational_dropout( self._text_field_embedder(passage)) passage_length = embedded_passage.size(1) question_mask = util.get_text_field_mask(question, num_wrapping_dims=1).float() question_mask = question_mask.reshape(total_qa_count, max_q_len) passage_mask = util.get_text_field_mask(passage).float() repeated_passage_mask = passage_mask.unsqueeze(1).repeat( 1, max_qa_count, 1) repeated_passage_mask = repeated_passage_mask.view( total_qa_count, passage_length) if self._num_context_answers > 0: # Encode question turn number inside the dialog into question embedding. question_num_ind = util.get_range_vector( max_qa_count, util.get_device_of(embedded_question)) question_num_ind = question_num_ind.unsqueeze(-1).repeat( 1, max_q_len) question_num_ind = question_num_ind.unsqueeze(0).repeat( batch_size, 1, 1) question_num_ind = question_num_ind.reshape( total_qa_count, max_q_len) question_num_marker_emb = self._question_num_marker( question_num_ind) embedded_question = torch.cat( [embedded_question, question_num_marker_emb], dim=-1) # Encode the previous answers in passage embedding. repeated_embedded_passage = embedded_passage.unsqueeze(1).repeat(1, max_qa_count, 1, 1). \ view(total_qa_count, passage_length, self._text_field_embedder.get_output_dim()) # batch_size * max_qa_count, passage_length, word_embed_dim p1_answer_marker = p1_answer_marker.view(total_qa_count, passage_length) p1_answer_marker_emb = self._prev_ans_marker(p1_answer_marker) repeated_embedded_passage = torch.cat( [repeated_embedded_passage, p1_answer_marker_emb], dim=-1) if self._num_context_answers > 1: p2_answer_marker = p2_answer_marker.view( total_qa_count, passage_length) p2_answer_marker_emb = self._prev_ans_marker(p2_answer_marker) repeated_embedded_passage = torch.cat( [repeated_embedded_passage, p2_answer_marker_emb], dim=-1) if self._num_context_answers > 2: p3_answer_marker = p3_answer_marker.view( total_qa_count, passage_length) p3_answer_marker_emb = self._prev_ans_marker( p3_answer_marker) repeated_embedded_passage = torch.cat( [repeated_embedded_passage, p3_answer_marker_emb], dim=-1) repeated_encoded_passage = self._variational_dropout( self._phrase_layer(repeated_embedded_passage, repeated_passage_mask)) else: encoded_passage = self._variational_dropout( self._phrase_layer(embedded_passage, passage_mask)) repeated_encoded_passage = encoded_passage.unsqueeze(1).repeat( 1, max_qa_count, 1, 1) repeated_encoded_passage = repeated_encoded_passage.view( total_qa_count, passage_length, self._encoding_dim) encoded_question = self._variational_dropout( self._phrase_layer(embedded_question, question_mask)) # Shape: (batch_size * max_qa_count, passage_length, question_length) passage_question_similarity = self._matrix_attention( repeated_encoded_passage, encoded_question) # Shape: (batch_size * max_qa_count, passage_length, question_length) passage_question_attention = util.masked_softmax( passage_question_similarity, question_mask) # Shape: (batch_size * max_qa_count, passage_length, encoding_dim) passage_question_vectors = util.weighted_sum( encoded_question, passage_question_attention) # We replace masked values with something really negative here, so they don't affect the # max below. masked_similarity = util.replace_masked_values( passage_question_similarity, question_mask.unsqueeze(1), -1e7) question_passage_similarity = masked_similarity.max( dim=-1)[0].squeeze(-1) question_passage_attention = util.masked_softmax( question_passage_similarity, repeated_passage_mask) # Shape: (batch_size * max_qa_count, encoding_dim) question_passage_vector = util.weighted_sum( repeated_encoded_passage, question_passage_attention) tiled_question_passage_vector = question_passage_vector.unsqueeze( 1).expand(total_qa_count, passage_length, self._encoding_dim) # Shape: (batch_size * max_qa_count, passage_length, encoding_dim * 4) final_merged_passage = torch.cat([ repeated_encoded_passage, passage_question_vectors, repeated_encoded_passage * passage_question_vectors, repeated_encoded_passage * tiled_question_passage_vector ], dim=-1) final_merged_passage = F.relu(self._merge_atten(final_merged_passage)) residual_layer = self._variational_dropout( self._residual_encoder(final_merged_passage, repeated_passage_mask)) self_attention_matrix = self._self_attention(residual_layer, residual_layer) mask = repeated_passage_mask.reshape(total_qa_count, passage_length, 1) \ * repeated_passage_mask.reshape(total_qa_count, 1, passage_length) self_mask = torch.eye(passage_length, passage_length, device=self_attention_matrix.device) self_mask = self_mask.reshape(1, passage_length, passage_length) mask = mask * (1 - self_mask) self_attention_probs = util.masked_softmax(self_attention_matrix, mask) # (batch, passage_len, passage_len) * (batch, passage_len, dim) -> (batch, passage_len, dim) self_attention_vecs = torch.matmul(self_attention_probs, residual_layer) self_attention_vecs = torch.cat([ self_attention_vecs, residual_layer, residual_layer * self_attention_vecs ], dim=-1) residual_layer = F.relu( self._merge_self_attention(self_attention_vecs)) final_merged_passage = final_merged_passage + residual_layer # batch_size * maxqa_pair_len * max_passage_len * 200 final_merged_passage = self._variational_dropout(final_merged_passage) start_rep = self._span_start_encoder(final_merged_passage, repeated_passage_mask) span_start_logits = self._span_start_predictor(start_rep).squeeze(-1) end_rep = self._span_end_encoder( torch.cat([final_merged_passage, start_rep], dim=-1), repeated_passage_mask) span_end_logits = self._span_end_predictor(end_rep).squeeze(-1) span_yesno_logits = self._span_yesno_predictor(end_rep).squeeze(-1) span_followup_logits = self._span_followup_predictor(end_rep).squeeze( -1) span_start_logits = util.replace_masked_values(span_start_logits, repeated_passage_mask, -1e7) # batch_size * maxqa_len_pair, max_document_len span_end_logits = util.replace_masked_values(span_end_logits, repeated_passage_mask, -1e7) best_span = self._get_best_span_yesno_followup(span_start_logits, span_end_logits, span_yesno_logits, span_followup_logits, self._max_span_length) output_dict: Dict[str, Any] = {} # Compute the loss. if span_start is not None: loss = nll_loss(util.masked_log_softmax(span_start_logits, repeated_passage_mask), span_start.view(-1), ignore_index=-1) self._span_start_accuracy(span_start_logits, span_start.view(-1), mask=qa_mask) loss += nll_loss(util.masked_log_softmax(span_end_logits, repeated_passage_mask), span_end.view(-1), ignore_index=-1) self._span_end_accuracy(span_end_logits, span_end.view(-1), mask=qa_mask) self._span_accuracy(best_span[:, 0:2], torch.stack([span_start, span_end], -1).view(total_qa_count, 2), mask=qa_mask.unsqueeze(1).expand(-1, 2).long()) # add a select for the right span to compute loss gold_span_end_loc = [] span_end = span_end.view(total_qa_count).squeeze().cpu().detach( ).numpy().reshape(total_qa_count) # print("span_end = {}, type(span_end)={} total_qa_count = {}".format(span_end, type(span_end), total_qa_count)) print("span_end.shape = {}".format(span_end.shape)) for i in range(0, total_qa_count): gold_span_end_loc.append( max(span_end[i] * 3 + i * passage_length * 3, 0)) gold_span_end_loc.append( max(span_end[i] * 3 + i * passage_length * 3 + 1, 0)) gold_span_end_loc.append( max(span_end[i] * 3 + i * passage_length * 3 + 2, 0)) # print("i = {}, gold_span_end_loc = {}".format(i, gold_span_end_loc)) gold_span_end_loc = span_start.new(gold_span_end_loc) pred_span_end_loc = [] for i in range(0, total_qa_count): pred_span_end_loc.append( max(best_span[i][1] * 3 + i * passage_length * 3, 0)) pred_span_end_loc.append( max(best_span[i][1] * 3 + i * passage_length * 3 + 1, 0)) pred_span_end_loc.append( max(best_span[i][1] * 3 + i * passage_length * 3 + 2, 0)) predicted_end = span_start.new(pred_span_end_loc) _yesno = span_yesno_logits.view(-1).index_select( 0, gold_span_end_loc).view(-1, 3) _followup = span_followup_logits.view(-1).index_select( 0, gold_span_end_loc).view(-1, 3) loss += nll_loss(F.log_softmax(_yesno, dim=-1), yesno_list.view(-1), ignore_index=-1) loss += nll_loss(F.log_softmax(_followup, dim=-1), followup_list.view(-1), ignore_index=-1) _yesno = span_yesno_logits.view(-1).index_select( 0, predicted_end).view(-1, 3) _followup = span_followup_logits.view(-1).index_select( 0, predicted_end).view(-1, 3) self._span_yesno_accuracy(_yesno, yesno_list.view(-1), mask=qa_mask) self._span_followup_accuracy(_followup, followup_list.view(-1), mask=qa_mask) output_dict["loss"] = loss # Compute F1 and preparing the output dictionary. output_dict['best_span_str'] = [] output_dict['qid'] = [] output_dict['followup'] = [] output_dict['yesno'] = [] best_span_cpu = best_span.detach().cpu().numpy() for i in range(batch_size): passage_str = metadata[i]['original_passage'] offsets = metadata[i]['token_offsets'] f1_score = 0.0 per_dialog_best_span_list = [] per_dialog_yesno_list = [] per_dialog_followup_list = [] per_dialog_query_id_list = [] for per_dialog_query_index, (iid, answer_texts) in enumerate( zip(metadata[i]["instance_id"], metadata[i]["answer_texts_list"])): predicted_span = tuple(best_span_cpu[i * max_qa_count + per_dialog_query_index]) start_offset = offsets[predicted_span[0]][0] end_offset = offsets[predicted_span[1]][1] yesno_pred = predicted_span[2] followup_pred = predicted_span[3] per_dialog_yesno_list.append(yesno_pred) per_dialog_followup_list.append(followup_pred) per_dialog_query_id_list.append(iid) best_span_string = passage_str[start_offset:end_offset] per_dialog_best_span_list.append(best_span_string) if answer_texts: if len(answer_texts) > 1: t_f1 = [] # Compute F1 over N-1 human references and averages the scores. for answer_index in range(len(answer_texts)): idxes = list(range(len(answer_texts))) idxes.pop(answer_index) refs = [answer_texts[z] for z in idxes] t_f1.append( squad_eval.metric_max_over_ground_truths( squad_eval.f1_score, best_span_string, refs)) f1_score = 1.0 * sum(t_f1) / len(t_f1) else: f1_score = squad_eval.metric_max_over_ground_truths( squad_eval.f1_score, best_span_string, answer_texts) self._official_f1(100 * f1_score) output_dict['qid'].append(per_dialog_query_id_list) output_dict['best_span_str'].append(per_dialog_best_span_list) output_dict['yesno'].append(per_dialog_yesno_list) output_dict['followup'].append(per_dialog_followup_list) return output_dict
pre.reset() rec.reset() network.eval() with torch.no_grad(): for i, data in enumerate(dataloader, 0): t0 = time.time() img, input32, gt32, input64, gt64, f, mod, seq = data img = img.cuda() input32 = input32.cuda() gt32 = gt32.cuda() input64 = input64.cuda() gt64 = gt64.cuda() prediction1, prediction2, prediction3 = network(img, input32, input64) prediction1 = F.softmax(prediction1, dim=1) prediction1 = torch.ge(prediction1[:, 1, :, :, :], opt.th) prediction1 = prediction1.type(torch.cuda.FloatTensor) gt32 = gt32.type(torch.cuda.FloatTensor) inter1 = torch.min(prediction1, gt32).sum(3).sum(2).sum(1) union1 = torch.max(prediction1, gt32).sum(3).sum(2).sum(1) inter_over_union1 = torch.mean(inter1 / union1) iou1.update(inter_over_union1.item()) prediction2 = F.softmax(prediction2, dim=1) prediction2 = torch.ge(prediction2[:, 1, :, :, :], opt.th) prediction2 = prediction2.type(torch.cuda.FloatTensor) inter2 = torch.min(prediction2, gt32).sum(3).sum(2).sum(1) union2 = torch.max(prediction2, gt32).sum(3).sum(2).sum(1) inter_over_union2 = torch.mean(inter2 / union2) iou2.update(inter_over_union2.item())
def forward(self, classifications, regressions, anchors, annotations, **kwargs): alpha = 0.25 gamma = 2.0 batch_size = classifications.shape[0] classification_losses = [] regression_losses = [] anchor = anchors[ 0, :, :] # assuming all image sizes are the same, which it is dtype = anchors.dtype anchor_widths = anchor[:, 3] - anchor[:, 1] anchor_heights = anchor[:, 2] - anchor[:, 0] anchor_ctr_x = anchor[:, 1] + 0.5 * anchor_widths anchor_ctr_y = anchor[:, 0] + 0.5 * anchor_heights for j in range(batch_size): classification = classifications[j, :, :] regression = regressions[j, :, :] bbox_annotation = annotations[j] bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1] classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4) if bbox_annotation.shape[0] == 0: if torch.cuda.is_available(): alpha_factor = torch.ones_like(classification) * alpha alpha_factor = alpha_factor.cuda() alpha_factor = 1. - alpha_factor focal_weight = classification focal_weight = alpha_factor * torch.pow( focal_weight, gamma) bce = -(torch.log(1.0 - classification)) cls_loss = focal_weight * bce regression_losses.append(torch.tensor(0).to(dtype).cuda()) classification_losses.append(cls_loss.sum()) else: alpha_factor = torch.ones_like(classification) * alpha alpha_factor = 1. - alpha_factor focal_weight = classification focal_weight = alpha_factor * torch.pow( focal_weight, gamma) bce = -(torch.log(1.0 - classification)) cls_loss = focal_weight * bce regression_losses.append(torch.tensor(0).to(dtype)) classification_losses.append(cls_loss.sum()) continue IoU = calc_iou(anchor[:, :], bbox_annotation[:, :4]) IoU_max, IoU_argmax = torch.max(IoU, dim=1) # compute the loss for classification targets = torch.ones_like(classification) * -1 if torch.cuda.is_available(): targets = targets.cuda() targets[torch.lt(IoU_max, 0.4), :] = 0 positive_indices = torch.ge(IoU_max, 0.5) num_positive_anchors = positive_indices.sum() assigned_annotations = bbox_annotation[IoU_argmax, :] targets[positive_indices, :] = 0 targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1 alpha_factor = torch.ones_like(targets) * alpha if torch.cuda.is_available(): alpha_factor = alpha_factor.cuda() alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor) focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification) focal_weight = alpha_factor * torch.pow(focal_weight, gamma) bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification)) cls_loss = focal_weight * bce zeros = torch.zeros_like(cls_loss) if torch.cuda.is_available(): zeros = zeros.cuda() cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, zeros) classification_losses.append( cls_loss.sum() / torch.clamp(num_positive_anchors.to(dtype), min=1.0)) if positive_indices.sum() > 0: assigned_annotations = assigned_annotations[ positive_indices, :] anchor_widths_pi = anchor_widths[positive_indices] anchor_heights_pi = anchor_heights[positive_indices] anchor_ctr_x_pi = anchor_ctr_x[positive_indices] anchor_ctr_y_pi = anchor_ctr_y[positive_indices] gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0] gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1] gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights # efficientdet style gt_widths = torch.clamp(gt_widths, min=1) gt_heights = torch.clamp(gt_heights, min=1) targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi targets_dw = torch.log(gt_widths / anchor_widths_pi) targets_dh = torch.log(gt_heights / anchor_heights_pi) targets = torch.stack( (targets_dy, targets_dx, targets_dh, targets_dw)) targets = targets.t() regression_diff = torch.abs(targets - regression[positive_indices, :]) regression_loss = torch.where( torch.le(regression_diff, 1.0 / 9.0), 0.5 * 9.0 * torch.pow(regression_diff, 2), regression_diff - 0.5 / 9.0) regression_losses.append(regression_loss.mean()) else: if torch.cuda.is_available(): regression_losses.append(torch.tensor(0).to(dtype).cuda()) else: regression_losses.append(torch.tensor(0).to(dtype)) # debug imgs = kwargs.get('imgs', None) if imgs is not None: regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() obj_list = kwargs.get('obj_list', None) out = postprocess( imgs.detach(), torch.stack([anchors[0]] * imgs.shape[0], 0).detach(), regressions.detach(), classifications.detach(), regressBoxes, clipBoxes, 0.5, 0.3) imgs = imgs.permute(0, 2, 3, 1).cpu().numpy() imgs = ((imgs * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255).astype(np.uint8) imgs = [cv2.cvtColor(img, cv2.COLOR_RGB2BGR) for img in imgs] display(out, imgs, obj_list, imshow=False, imwrite=True) return torch.stack(classification_losses).mean(dim=0, keepdim=True), \ torch.stack(regression_losses).mean(dim=0, keepdim=True)
def forward(self, classifications, regressions, anchors, annotations): batch_size = classifications.shape[0] classification_losses = [] regression_losses = [] anchor = anchors[0, :, :] anchor_widths = anchor[:, 2] - anchor[:, 0] anchor_heights = anchor[:, 3] - anchor[:, 1] anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights for j in range(batch_size): classification = classifications[j, :, :] regression = regressions[j, :, :] bbox_annotation = annotations[j, :, :] bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1] classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4) if bbox_annotation.shape[0] == 0: if torch.cuda.is_available(): alpha_factor = torch.ones( classification.shape).cuda() * self.alpha alpha_factor = 1. - alpha_factor focal_weight = classification focal_weight = alpha_factor * torch.pow( focal_weight, self.gamma) bce = -(torch.log(1.0 - classification)) # cls_loss = focal_weight * torch.pow(bce, gamma) cls_loss = focal_weight * bce classification_losses.append(cls_loss.sum()) regression_losses.append(torch.tensor(0).float().cuda()) else: alpha_factor = torch.ones( classification.shape) * self.alpha alpha_factor = 1. - alpha_factor focal_weight = classification focal_weight = alpha_factor * torch.pow( focal_weight, self.gamma) bce = -(torch.log(1.0 - classification)) # cls_loss = focal_weight * torch.pow(bce, gamma) cls_loss = focal_weight * bce classification_losses.append(cls_loss.sum()) regression_losses.append(torch.tensor(0).float()) continue IoU = calc_iou( anchors[0, :, :], bbox_annotation[:, :4]) # num_anchors x num_annotations IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1 #import pdb #pdb.set_trace() # compute the loss for classification targets = torch.ones(classification.shape) * -1 if torch.cuda.is_available(): targets = targets.cuda() targets[torch.lt(IoU_max, 0.4), :] = 0 positive_indices = torch.ge(IoU_max, 0.5) num_positive_anchors = positive_indices.sum() assigned_annotations = bbox_annotation[IoU_argmax, :] targets[positive_indices, :] = 0 targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1 if torch.cuda.is_available(): alpha_factor = torch.ones(targets.shape).cuda() * self.alpha else: alpha_factor = torch.ones(targets.shape) * self.alpha alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor) focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification) focal_weight = alpha_factor * torch.pow(focal_weight, self.gamma) bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification)) # cls_loss = focal_weight * torch.pow(bce, gamma) cls_loss = focal_weight * bce if torch.cuda.is_available(): cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda()) else: cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape)) classification_losses.append( cls_loss.sum() / torch.clamp(num_positive_anchors.float(), min=1.0)) # compute the loss for regression if positive_indices.sum() > 0: assigned_annotations = assigned_annotations[ positive_indices, :] anchor_widths_pi = anchor_widths[positive_indices] anchor_heights_pi = anchor_heights[positive_indices] anchor_ctr_x_pi = anchor_ctr_x[positive_indices] anchor_ctr_y_pi = anchor_ctr_y[positive_indices] gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0] gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1] gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights # clip widths to 1 gt_widths = torch.clamp(gt_widths, min=1) gt_heights = torch.clamp(gt_heights, min=1) targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi targets_dw = torch.log(gt_widths / anchor_widths_pi) targets_dh = torch.log(gt_heights / anchor_heights_pi) targets = torch.stack( (targets_dx, targets_dy, targets_dw, targets_dh)) targets = targets.t() if torch.cuda.is_available(): targets = targets / torch.Tensor([[0.1, 0.1, 0.2, 0.2] ]).cuda() else: targets = targets / torch.Tensor([[0.1, 0.1, 0.2, 0.2]]) negative_indices = 1 + (~positive_indices) regression_diff = torch.abs(targets - regression[positive_indices, :]) regression_loss = torch.where( torch.le(regression_diff, 1.0 / 9.0), 0.5 * 9.0 * torch.pow(regression_diff, 2), regression_diff - 0.5 / 9.0) regression_losses.append(regression_loss.mean()) else: if torch.cuda.is_available(): regression_losses.append(torch.tensor(0).float().cuda()) else: regression_losses.append(torch.tensor(0).float()) return torch.stack(classification_losses).mean( dim=0, keepdim=True), torch.stack(regression_losses).mean(dim=0, keepdim=True)
def compute(self, anchors, annotations, regressions): #Initialize data structures for assignment labels_b = torch.ones([self.image_per_gpu, self.output_num, self.num_classes]).cuda()*-1 #Initialize data structures for regression loss if self.losstype > 2: regression_losses= torch.tensor([]).cuda() else: regression_losses = torch.zeros(self.image_per_gpu).cuda() anchor_boxes = self.xy_to_wh(anchors[0, :, :].type(torch.cuda.FloatTensor)) p_num=0 for j in range(self.image_per_gpu): bbox_annotation = annotations[j, :, :] bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1] if bbox_annotation.shape[0] == 0: labels_b[j]=torch.zeros([self.output_num, self.num_classes]).cuda() continue IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4]) # num_anchors x num_annotations if self.assigner['type'] == "IoUAssigner": IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1 ###### gt_IoU_max, gt_IoU_argmax = torch.max(IoU, dim=0) gt_IoU_argmax=torch.where(IoU==gt_IoU_max)[0] positive_indices = torch.ge(torch.zeros(IoU_max.shape).cuda(),1) positive_indices[gt_IoU_argmax.long()] = True ###### positive_indices = positive_indices | torch.ge(IoU_max, self.assigner['pos_min_IoU']) negative_indices = torch.lt(IoU_max, self.assigner['neg_max_IoU']) assigned_annotations = bbox_annotation[IoU_argmax, :] labels_b[j, negative_indices, :] = 0 labels_b[j, positive_indices, :] = 0 labels_b[j, positive_indices, assigned_annotations[positive_indices, 4].long()] = 1 elif self.assigner['type'] == "ATSSAssigner": #1. compute center distance between all bbox and gt num_gt = bbox_annotation.shape[0] gt_cx = (bbox_annotation[:, 0] + bbox_annotation[:, 2]) / 2.0 gt_cy = (bbox_annotation[:, 1] + bbox_annotation[:, 3]) / 2.0 gt_points = torch.stack((gt_cx, gt_cy), dim=1) bboxes_cx = ((anchors[0, :, 0] + anchors[0, :, 2]) / 2.0).float() bboxes_cy = ((anchors[0, :, 1] + anchors[0, :, 3]) / 2.0).float() bboxes_points = torch.stack((bboxes_cx, bboxes_cy), dim=1) distances = (bboxes_points[:, None, :] -gt_points[None, :, :]).pow(2).sum(-1).sqrt() #2. on each pyramid level, for each gt, select k bbox whose center #are closest to the gt center, so we total select k*l bbox as #candidates for each gt candidate_idxs = [] start_idx = 0 for level, bboxes_per_level in enumerate(self.fpn_anchor_num): # on each pyramid level, for each gt, # select k bbox whose center are closest to the gt center end_idx = int(start_idx + bboxes_per_level) distances_per_level = distances[start_idx:end_idx, :] selectable_k = min(self.assigner['topk'], int(bboxes_per_level)) _, topk_idxs_per_level = distances_per_level.topk(selectable_k, dim=0, largest=False) candidate_idxs.append(topk_idxs_per_level + start_idx) start_idx = end_idx candidate_idxs = torch.cat(candidate_idxs, dim=0) #3. get corresponding iou for the these candidates, and compute the #mean and std, set mean + std as the iou threshold candidate_overlaps = IoU[candidate_idxs, torch.arange(num_gt)] overlaps_mean_per_gt = candidate_overlaps.mean(0) overlaps_std_per_gt = candidate_overlaps.std(0) overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt #4. select these candidates whose iou are greater than or equal to #the threshold as postive is_pos = candidate_overlaps >= overlaps_thr_per_gt[None, :] #5. limit the positive sample's center in gt for gt_idx in range(num_gt): candidate_idxs[:, gt_idx] += gt_idx * self.output_num ep_bboxes_cx = bboxes_cx.view(1, -1).expand(num_gt, self.output_num).contiguous().view(-1) ep_bboxes_cy = bboxes_cy.view(1, -1).expand(num_gt, self.output_num).contiguous().view(-1) candidate_idxs = candidate_idxs.view(-1) # calculate the left, top, right, bottom distance between positive # bbox center and gt side l_ = ep_bboxes_cx[candidate_idxs].view(-1, num_gt) - bbox_annotation[:, 0] t_ = ep_bboxes_cy[candidate_idxs].view(-1, num_gt) - bbox_annotation[:, 1] r_ = bbox_annotation[:, 2] - ep_bboxes_cx[candidate_idxs].view(-1, num_gt) b_ = bbox_annotation[:, 3] - ep_bboxes_cy[candidate_idxs].view(-1, num_gt) is_in_gts = torch.stack([l_, t_, r_, b_], dim=1).min(dim=1)[0] > 0.01 is_pos = is_pos & is_in_gts # if an anchor box is assigned to multiple gts, # the one with the highest IoU will be selected. IoU_inf = torch.full_like(IoU, -1).t().contiguous().view(-1) index = candidate_idxs.view(-1)[is_pos.view(-1)] IoU_inf[index] = IoU.t().contiguous().view(-1)[index] IoU_inf = IoU_inf.view(num_gt, -1).t() IoU_max, IoU_argmax = IoU_inf.max(dim=1) positive_indices = IoU_max > -1 negative_indices = ~positive_indices assigned_annotations = bbox_annotation[IoU_argmax, :] labels_b[j, :, :] = 0 labels_b[j, positive_indices, assigned_annotations[positive_indices, 4].long()] = 1 #Regression Loss Computation Starts here pos_ex_num=positive_indices.sum() p_num+=pos_ex_num if pos_ex_num > 0: gt_boxes = self.xy_to_wh(assigned_annotations[positive_indices, :]) targets2 = self.bbox2delta(gt_boxes, anchor_boxes[positive_indices, :]) if self.losstype == 0: regression_diff_abs= torch.abs(regressions[j, positive_indices, :]-targets2) regression_loss = torch.where( torch.le(regression_diff_abs, self.beta), 0.5 * torch.pow(regression_diff_abs, 2)/self.beta, regression_diff_abs - 0.5 * self.beta ) regression_losses[j]=regression_loss.sum() elif self.losstype == 1: # convert targets and model outputs to boxes for IoU-Loss. targets2_ = self.delta2bbox(targets2) regression_ = self.delta2bbox(regressions[j, positive_indices, :]) # calculate bbox overlaps ious = bbox_overlaps(regression_, targets2_) regression_loss = 1 - ious regression_losses[j]=regression_loss.sum() elif self.losstype == 2: # convert targets and model outputs to boxes for IoU-Loss. targets2_ = self.delta2bbox(targets2) regression_ = self.delta2bbox(regressions[j, positive_indices, :]) # calculate bbox overlaps ious = compute_giou(regression_, targets2_) regression_loss = 1 - ious regression_losses[j]=regression_loss.sum() else: # convert targets and model outputs to boxes for IoU-Loss. targets2_ = self.delta2bbox(targets2) regression_ = self.delta2bbox(regressions[j, positive_indices, :]) # calculate bbox overlaps if self.reg_loss.iou_type == 'IoU': ious = (1-bbox_overlaps(regression_, targets2_)) elif self.reg_loss.iou_type =='GIoU': ious = (1-compute_giou(regression_, targets2_)) / 2 #tau is set to 0.5 by default regression_losses=torch.cat([regression_losses, ((ious)/(1-self.tau))], dim=0) else: if self.losstype <= 2: regression_losses[j]=torch.tensor(0).float().cuda() else: continue if self.losstype <= 2: #Following AP Loss implementation, we normalize over the number of #regression inputs once classical regression losses are adopted. return labels_b, regression_losses.sum()/(4*p_num) else: return labels_b, regression_losses
def compute_projection(self, depth, camera_to_world, world_to_grid, random_center_voxel_indices): # compute projection by voxels -> image try: if np.linalg.matrix_rank(camera_to_world) != 4: print('Found singular camera_to_world %s' % camera_to_world) return None world_to_camera = torch.inverse(camera_to_world) except RuntimeError as e: # Throws RuntimeError: MAGMA getrf: U(1, 1) is 0, U is singular # Probably due to some pose file not uploaded properly print("Got runtime exception in computing inverse of C2W: \n%s", camera_to_world) if hasattr(e, 'message'): print(e.message) else: print(e) return None try: grid_to_world = torch.inverse(world_to_grid) except RuntimeError as e: print("Got runtime exception in computing inverse of W2G: \n%s", grid_to_world) if hasattr(e, 'message'): print(e.message) else: print(e) return None voxel_bounds_min, voxel_bounds_max = self.compute_frustum_bounds( world_to_grid, camera_to_world) CUDA_AVAILABLE = os.environ['CUDA_VISIBLE_DEVICES'] if CUDA_AVAILABLE: voxel_bounds_min = np.maximum(voxel_bounds_min, 0).cuda() voxel_bounds_max = np.minimum(voxel_bounds_max, self.volume_dims).float().cuda() # coordinates within frustum bounds lin_ind_volume = torch.arange(0, self.volume_dims[0] * self.volume_dims[1] * self.volume_dims[2], out=torch.LongTensor()).cuda() else: voxel_bounds_min = np.maximum(voxel_bounds_min, 0) voxel_bounds_max = np.minimum(voxel_bounds_max, self.volume_dims).float() # coordinates within frustum bounds lin_ind_volume = torch.arange(0, self.volume_dims[0] * self.volume_dims[1] * self.volume_dims[2], out=torch.LongTensor()) # Homogeneous Coordinates of 3d volume of projection of voxels in volumetric grid coords = camera_to_world.new(4, lin_ind_volume.size(0)) coords[2] = lin_ind_volume / (self.volume_dims[0] * self.volume_dims[1]) tmp = lin_ind_volume - (coords[2] * self.volume_dims[0] * self.volume_dims[1]).long() coords[1] = tmp / self.volume_dims[0] coords[0] = torch.remainder(tmp, self.volume_dims[0]) coords[3].fill_(1) mask_frustum_bounds = torch.ge(coords[0], voxel_bounds_min[0]) * \ torch.ge(coords[1], voxel_bounds_min[1]) * \ torch.ge(coords[2], voxel_bounds_min[2]) mask_frustum_bounds = mask_frustum_bounds * \ torch.lt(coords[0], voxel_bounds_max[0]) * \ torch.lt(coords[1], voxel_bounds_max[1]) * \ torch.lt(coords[2], voxel_bounds_max[2]) # remove 2d depth mappings if random_center_voxel_indices.nelement() > 0: # Setting mask to zero, will discard the corresponding depth pixels below. # for i in range(self.volume_dims[2]): # mask_frustum_bounds[center_voxel_begin + i * step_size] = 0 mask_frustum_bounds[self.center_voxel_begin + random_center_voxel_indices * self.step_size] = 0 if not mask_frustum_bounds.any(): print('error: nothing in frustum bounds') return None lin_ind_volume = lin_ind_volume[mask_frustum_bounds] coords = coords.resize_(4, lin_ind_volume.size(0)) coords[2] = lin_ind_volume / (self.volume_dims[0] * self.volume_dims[1]) tmp = lin_ind_volume - (coords[2] * self.volume_dims[0] * self.volume_dims[1]).long() coords[1] = tmp / self.volume_dims[0] coords[0] = torch.remainder(tmp, self.volume_dims[0]) coords[3].fill_(1) # transform to current frame p = torch.mm(torch.mm(world_to_camera, grid_to_world), coords) # project into image p[0] = (p[0] * self.intrinsic[0][0]) / p[2] + self.intrinsic[0][2] p[1] = (p[1] * self.intrinsic[1][1]) / p[2] + self.intrinsic[1][2] pi = torch.round(p).long() # Check if all points projected on image plane, lies inside image boundary valid_ind_mask = torch.ge(pi[0], 0) * torch.ge(pi[1], 0) * \ torch.lt(pi[0], self.image_dims[0]) * torch.lt(pi[1], self.image_dims[1]) if not valid_ind_mask.any(): print('error: no valid image indices') return None # Create depth mask for indices with valid depth valid_image_ind_x = pi[0][valid_ind_mask] valid_image_ind_y = pi[1][valid_ind_mask] valid_image_ind_lin = valid_image_ind_y * self.image_dims[ 0] + valid_image_ind_x depth_vals = torch.index_select(depth.view(-1), 0, valid_image_ind_lin) depth_mask = depth_vals.ge(self.depth_min) * depth_vals.le( self.depth_max) * torch.abs(depth_vals - p[2][valid_ind_mask]).le( self.voxel_size) if not depth_mask.any(): print('error: no valid depths') return None lin_ind_update = lin_ind_volume[valid_ind_mask] lin_ind_update = lin_ind_update[depth_mask] lin_indices_3d = lin_ind_update.new( self.volume_dims[0] * self.volume_dims[1] * self.volume_dims[2] + 1 ) #needs to be same size for all in batch... (first element has size) lin_indices_2d = lin_ind_update.new( self.volume_dims[0] * self.volume_dims[1] * self.volume_dims[2] + 1 ) #needs to be same size for all in batch... (first element has size) lin_indices_3d[0] = lin_ind_update.shape[0] lin_indices_2d[0] = lin_ind_update.shape[0] lin_indices_3d[1:1 + lin_indices_3d[0]] = lin_ind_update lin_indices_2d[1:1 + lin_indices_2d[0]] = torch.index_select( valid_image_ind_lin, 0, torch.nonzero(depth_mask)[:, 0]) num_ind = lin_indices_3d[0] return lin_indices_3d, lin_indices_2d
query_embedding = query_embedding.squeeze() query_center = query_embedding[:dimensionality] query_offset = query_embedding[dimensionality:] positive_offset = query_center + query_offset negative_offset = query_center - query_offset query_minimum = torch.min(positive_offset, negative_offset) query_maximum = torch.max(positive_offset, negative_offset) found_answers = set() indexer = 0 for entity in node_embeddings: closest_point = closest(query_embedding, entity) is_answer = True for i in range(dimensionality): if not (torch.ge(closest_point[i], query_minimum[i]) and torch.ge(query_maximum[i], closest_point[i])): is_answer = False if is_answer: found_answers |= {indexer} indexer += 1 true_answers = query_dict["raw_query"]["targets"] true_positives = true_answers & found_answers false_positives = found_answers - true_answers false_negatives = true_answers - found_answers true_negatives = ( (all_nodes - true_positives) - false_positives) - false_negatives
def __ge__(self, other): return torch.ge(self, other)
def _compute_true_acc(predictions): predictions = torch.ge(predictions.data, 0.5) if len(predictions.size()) == 3: predictions = predictions.view(predictions.size(0) * predictions.size(1) * predictions.size(2)) acc = (predictions == 1).sum() / (1.0 * predictions.size(0)) return acc
def forward(self, # type: ignore question: Dict[str, torch.LongTensor], passage: Dict[str, torch.LongTensor], span_start: torch.IntTensor = None, span_end: torch.IntTensor = None, p1_answer_marker: torch.IntTensor = None, p2_answer_marker: torch.IntTensor = None, p3_answer_marker: torch.IntTensor = None, yesno_list: torch.IntTensor = None, followup_list: torch.IntTensor = None, metadata: List[Dict[str, Any]] = None) -> Dict[str, torch.Tensor]: # pylint: disable=arguments-differ """ Parameters ---------- question : Dict[str, torch.LongTensor] From a ``TextField``. passage : Dict[str, torch.LongTensor] From a ``TextField``. The model assumes that this passage contains the answer to the question, and predicts the beginning and ending positions of the answer within the passage. span_start : ``torch.IntTensor``, optional From an ``IndexField``. This is one of the things we are trying to predict - the beginning position of the answer with the passage. This is an `inclusive` token index. If this is given, we will compute a loss that gets included in the output dictionary. span_end : ``torch.IntTensor``, optional From an ``IndexField``. This is one of the things we are trying to predict - the ending position of the answer with the passage. This is an `inclusive` token index. If this is given, we will compute a loss that gets included in the output dictionary. p1_answer_marker : ``torch.IntTensor``, optional This is one of the inputs, but only when num_context_answers > 0. This is a tensor that has a shape [batch_size, max_qa_count, max_passage_length]. Most passage token will have assigned 'O', except the passage tokens belongs to the previous answer in the dialog, which will be assigned labels such as <1_start>, <1_in>, <1_end>. For more details, look into dataset_readers/util/make_reading_comprehension_instance_quac p2_answer_marker : ``torch.IntTensor``, optional This is one of the inputs, but only when num_context_answers > 1. It is similar to p1_answer_marker, but marking previous previous answer in passage. p3_answer_marker : ``torch.IntTensor``, optional This is one of the inputs, but only when num_context_answers > 2. It is similar to p1_answer_marker, but marking previous previous previous answer in passage. yesno_list : ``torch.IntTensor``, optional This is one of the outputs that we are trying to predict. Three way classification (the yes/no/not a yes no question). followup_list : ``torch.IntTensor``, optional This is one of the outputs that we are trying to predict. Three way classification (followup / maybe followup / don't followup). metadata : ``List[Dict[str, Any]]``, optional If present, this should contain the question ID, original passage text, and token offsets into the passage for each instance in the batch. We use this for computing official metrics using the official SQuAD evaluation script. The length of this list should be the batch size, and each dictionary should have the keys ``id``, ``original_passage``, and ``token_offsets``. If you only want the best span string and don't care about official metrics, you can omit the ``id`` key. Returns ------- An output dictionary consisting of the followings. Each of the followings is a nested list because first iterates over dialog, then questions in dialog. qid : List[List[str]] A list of list, consisting of question ids. followup : List[List[int]] A list of list, consisting of continuation marker prediction index. (y :yes, m: maybe follow up, n: don't follow up) yesno : List[List[int]] A list of list, consisting of affirmation marker prediction index. (y :yes, x: not a yes/no question, n: np) best_span_str : List[List[str]] If sufficient metadata was provided for the instances in the batch, we also return the string from the original passage that the model thinks is the best answer to the question. loss : torch.FloatTensor, optional A scalar loss to be optimised. """ batch_size, max_qa_count, max_q_len, _ = question['token_characters'].size() total_qa_count = batch_size * max_qa_count qa_mask = torch.ge(followup_list, 0).view(total_qa_count) embedded_question = self._text_field_embedder(question, num_wrapping_dims=1) embedded_question = embedded_question.reshape(total_qa_count, max_q_len, self._text_field_embedder.get_output_dim()) embedded_question = self._variational_dropout(embedded_question) embedded_passage = self._variational_dropout(self._text_field_embedder(passage)) passage_length = embedded_passage.size(1) question_mask = util.get_text_field_mask(question, num_wrapping_dims=1).float() question_mask = question_mask.reshape(total_qa_count, max_q_len) passage_mask = util.get_text_field_mask(passage).float() repeated_passage_mask = passage_mask.unsqueeze(1).repeat(1, max_qa_count, 1) repeated_passage_mask = repeated_passage_mask.view(total_qa_count, passage_length) if self._num_context_answers > 0: # Encode question turn number inside the dialog into question embedding. question_num_ind = util.get_range_vector(max_qa_count, util.get_device_of(embedded_question)) question_num_ind = question_num_ind.unsqueeze(-1).repeat(1, max_q_len) question_num_ind = question_num_ind.unsqueeze(0).repeat(batch_size, 1, 1) question_num_ind = question_num_ind.reshape(total_qa_count, max_q_len) question_num_marker_emb = self._question_num_marker(question_num_ind) embedded_question = torch.cat([embedded_question, question_num_marker_emb], dim=-1) # Encode the previous answers in passage embedding. repeated_embedded_passage = embedded_passage.unsqueeze(1).repeat(1, max_qa_count, 1, 1). \ view(total_qa_count, passage_length, self._text_field_embedder.get_output_dim()) # batch_size * max_qa_count, passage_length, word_embed_dim p1_answer_marker = p1_answer_marker.view(total_qa_count, passage_length) p1_answer_marker_emb = self._prev_ans_marker(p1_answer_marker) repeated_embedded_passage = torch.cat([repeated_embedded_passage, p1_answer_marker_emb], dim=-1) if self._num_context_answers > 1: p2_answer_marker = p2_answer_marker.view(total_qa_count, passage_length) p2_answer_marker_emb = self._prev_ans_marker(p2_answer_marker) repeated_embedded_passage = torch.cat([repeated_embedded_passage, p2_answer_marker_emb], dim=-1) if self._num_context_answers > 2: p3_answer_marker = p3_answer_marker.view(total_qa_count, passage_length) p3_answer_marker_emb = self._prev_ans_marker(p3_answer_marker) repeated_embedded_passage = torch.cat([repeated_embedded_passage, p3_answer_marker_emb], dim=-1) repeated_encoded_passage = self._variational_dropout(self._phrase_layer(repeated_embedded_passage, repeated_passage_mask)) else: encoded_passage = self._variational_dropout(self._phrase_layer(embedded_passage, passage_mask)) repeated_encoded_passage = encoded_passage.unsqueeze(1).repeat(1, max_qa_count, 1, 1) repeated_encoded_passage = repeated_encoded_passage.view(total_qa_count, passage_length, self._encoding_dim) encoded_question = self._variational_dropout(self._phrase_layer(embedded_question, question_mask)) # Shape: (batch_size * max_qa_count, passage_length, question_length) passage_question_similarity = self._matrix_attention(repeated_encoded_passage, encoded_question) # Shape: (batch_size * max_qa_count, passage_length, question_length) passage_question_attention = util.masked_softmax(passage_question_similarity, question_mask) # Shape: (batch_size * max_qa_count, passage_length, encoding_dim) passage_question_vectors = util.weighted_sum(encoded_question, passage_question_attention) # We replace masked values with something really negative here, so they don't affect the # max below. masked_similarity = util.replace_masked_values(passage_question_similarity, question_mask.unsqueeze(1), -1e7) question_passage_similarity = masked_similarity.max(dim=-1)[0].squeeze(-1) question_passage_attention = util.masked_softmax(question_passage_similarity, repeated_passage_mask) # Shape: (batch_size * max_qa_count, encoding_dim) question_passage_vector = util.weighted_sum(repeated_encoded_passage, question_passage_attention) tiled_question_passage_vector = question_passage_vector.unsqueeze(1).expand(total_qa_count, passage_length, self._encoding_dim) # Shape: (batch_size * max_qa_count, passage_length, encoding_dim * 4) final_merged_passage = torch.cat([repeated_encoded_passage, passage_question_vectors, repeated_encoded_passage * passage_question_vectors, repeated_encoded_passage * tiled_question_passage_vector], dim=-1) final_merged_passage = F.relu(self._merge_atten(final_merged_passage)) residual_layer = self._variational_dropout(self._residual_encoder(final_merged_passage, repeated_passage_mask)) self_attention_matrix = self._self_attention(residual_layer, residual_layer) mask = repeated_passage_mask.reshape(total_qa_count, passage_length, 1) \ * repeated_passage_mask.reshape(total_qa_count, 1, passage_length) self_mask = torch.eye(passage_length, passage_length, device=self_attention_matrix.device) self_mask = self_mask.reshape(1, passage_length, passage_length) mask = mask * (1 - self_mask) self_attention_probs = util.masked_softmax(self_attention_matrix, mask) # (batch, passage_len, passage_len) * (batch, passage_len, dim) -> (batch, passage_len, dim) self_attention_vecs = torch.matmul(self_attention_probs, residual_layer) self_attention_vecs = torch.cat([self_attention_vecs, residual_layer, residual_layer * self_attention_vecs], dim=-1) residual_layer = F.relu(self._merge_self_attention(self_attention_vecs)) final_merged_passage = final_merged_passage + residual_layer # batch_size * maxqa_pair_len * max_passage_len * 200 final_merged_passage = self._variational_dropout(final_merged_passage) start_rep = self._span_start_encoder(final_merged_passage, repeated_passage_mask) span_start_logits = self._span_start_predictor(start_rep).squeeze(-1) end_rep = self._span_end_encoder(torch.cat([final_merged_passage, start_rep], dim=-1), repeated_passage_mask) span_end_logits = self._span_end_predictor(end_rep).squeeze(-1) span_yesno_logits = self._span_yesno_predictor(end_rep).squeeze(-1) span_followup_logits = self._span_followup_predictor(end_rep).squeeze(-1) span_start_logits = util.replace_masked_values(span_start_logits, repeated_passage_mask, -1e7) # batch_size * maxqa_len_pair, max_document_len span_end_logits = util.replace_masked_values(span_end_logits, repeated_passage_mask, -1e7) best_span = self._get_best_span_yesno_followup(span_start_logits, span_end_logits, span_yesno_logits, span_followup_logits, self._max_span_length) output_dict: Dict[str, Any] = {} # Compute the loss. if span_start is not None: loss = nll_loss(util.masked_log_softmax(span_start_logits, repeated_passage_mask), span_start.view(-1), ignore_index=-1) self._span_start_accuracy(span_start_logits, span_start.view(-1), mask=qa_mask) loss += nll_loss(util.masked_log_softmax(span_end_logits, repeated_passage_mask), span_end.view(-1), ignore_index=-1) self._span_end_accuracy(span_end_logits, span_end.view(-1), mask=qa_mask) self._span_accuracy(best_span[:, 0:2], torch.stack([span_start, span_end], -1).view(total_qa_count, 2), mask=qa_mask.unsqueeze(1).expand(-1, 2).long()) # add a select for the right span to compute loss gold_span_end_loc = [] span_end = span_end.view(total_qa_count).squeeze().data.cpu().numpy() for i in range(0, total_qa_count): gold_span_end_loc.append(max(span_end[i] * 3 + i * passage_length * 3, 0)) gold_span_end_loc.append(max(span_end[i] * 3 + i * passage_length * 3 + 1, 0)) gold_span_end_loc.append(max(span_end[i] * 3 + i * passage_length * 3 + 2, 0)) gold_span_end_loc = span_start.new(gold_span_end_loc) pred_span_end_loc = [] for i in range(0, total_qa_count): pred_span_end_loc.append(max(best_span[i][1] * 3 + i * passage_length * 3, 0)) pred_span_end_loc.append(max(best_span[i][1] * 3 + i * passage_length * 3 + 1, 0)) pred_span_end_loc.append(max(best_span[i][1] * 3 + i * passage_length * 3 + 2, 0)) predicted_end = span_start.new(pred_span_end_loc) _yesno = span_yesno_logits.view(-1).index_select(0, gold_span_end_loc).view(-1, 3) _followup = span_followup_logits.view(-1).index_select(0, gold_span_end_loc).view(-1, 3) loss += nll_loss(F.log_softmax(_yesno, dim=-1), yesno_list.view(-1), ignore_index=-1) loss += nll_loss(F.log_softmax(_followup, dim=-1), followup_list.view(-1), ignore_index=-1) _yesno = span_yesno_logits.view(-1).index_select(0, predicted_end).view(-1, 3) _followup = span_followup_logits.view(-1).index_select(0, predicted_end).view(-1, 3) self._span_yesno_accuracy(_yesno, yesno_list.view(-1), mask=qa_mask) self._span_followup_accuracy(_followup, followup_list.view(-1), mask=qa_mask) output_dict["loss"] = loss # Compute F1 and preparing the output dictionary. output_dict['best_span_str'] = [] output_dict['qid'] = [] output_dict['followup'] = [] output_dict['yesno'] = [] best_span_cpu = best_span.detach().cpu().numpy() for i in range(batch_size): passage_str = metadata[i]['original_passage'] offsets = metadata[i]['token_offsets'] f1_score = 0.0 per_dialog_best_span_list = [] per_dialog_yesno_list = [] per_dialog_followup_list = [] per_dialog_query_id_list = [] for per_dialog_query_index, (iid, answer_texts) in enumerate( zip(metadata[i]["instance_id"], metadata[i]["answer_texts_list"])): predicted_span = tuple(best_span_cpu[i * max_qa_count + per_dialog_query_index]) start_offset = offsets[predicted_span[0]][0] end_offset = offsets[predicted_span[1]][1] yesno_pred = predicted_span[2] followup_pred = predicted_span[3] per_dialog_yesno_list.append(yesno_pred) per_dialog_followup_list.append(followup_pred) per_dialog_query_id_list.append(iid) best_span_string = passage_str[start_offset:end_offset] per_dialog_best_span_list.append(best_span_string) if answer_texts: if len(answer_texts) > 1: t_f1 = [] # Compute F1 over N-1 human references and averages the scores. for answer_index in range(len(answer_texts)): idxes = list(range(len(answer_texts))) idxes.pop(answer_index) refs = [answer_texts[z] for z in idxes] t_f1.append(squad_eval.metric_max_over_ground_truths(squad_eval.f1_score, best_span_string, refs)) f1_score = 1.0 * sum(t_f1) / len(t_f1) else: f1_score = squad_eval.metric_max_over_ground_truths(squad_eval.f1_score, best_span_string, answer_texts) self._official_f1(100 * f1_score) output_dict['qid'].append(per_dialog_query_id_list) output_dict['best_span_str'].append(per_dialog_best_span_list) output_dict['yesno'].append(per_dialog_yesno_list) output_dict['followup'].append(per_dialog_followup_list) return output_dict
def test_net(cfg, epoch_idx=-1, output_dir=None, test_data_loader=None, test_writer=None, encoder=None, decoder=None, refiner=None, merger=None): # Enable the inbuilt cudnn auto-tuner to find the best algorithm to use torch.backends.cudnn.benchmark = True # Load taxonomies of dataset taxonomies = [] with open(cfg.DATASETS[cfg.DATASET.TEST_DATASET.upper()].TAXONOMY_FILE_PATH, encoding='utf-8') as file: taxonomies = json.loads(file.read()) taxonomies = {t['taxonomy_id']: t for t in taxonomies} # Set up data loader if test_data_loader is None: # Set up data augmentation IMG_SIZE = cfg.CONST.IMG_H, cfg.CONST.IMG_W CROP_SIZE = cfg.CONST.CROP_IMG_H, cfg.CONST.CROP_IMG_W test_transforms = utils.data_transforms.Compose([ utils.data_transforms.CenterCrop(IMG_SIZE, CROP_SIZE), utils.data_transforms.RandomBackground(cfg.TEST.RANDOM_BG_COLOR_RANGE), utils.data_transforms.Normalize(mean=cfg.DATASET.MEAN, std=cfg.DATASET.STD), utils.data_transforms.ToTensor(), ]) dataset_loader = utils.data_loaders.DATASET_LOADER_MAPPING[cfg.DATASET.TEST_DATASET](cfg) test_data_loader = torch.utils.data.DataLoader(dataset=dataset_loader.get_dataset( utils.data_loaders.DatasetType.TEST, cfg.CONST.N_VIEWS_RENDERING, test_transforms), batch_size=1, num_workers=0, pin_memory=True, shuffle=False) # Set up networks if decoder is None or encoder is None: encoder = Encoder(cfg) decoder = Decoder(cfg) refiner = Refiner(cfg) merger = Merger(cfg) if torch.cuda.is_available(): encoder = torch.nn.DataParallel(encoder).cuda() decoder = torch.nn.DataParallel(decoder).cuda() refiner = torch.nn.DataParallel(refiner).cuda() merger = torch.nn.DataParallel(merger).cuda() print('[INFO] %s Loading weights from %s ...' % (dt.now(), cfg.CONST.WEIGHTS)) checkpoint = torch.load(cfg.CONST.WEIGHTS) epoch_idx = checkpoint['epoch_idx'] encoder.load_state_dict(checkpoint['encoder_state_dict']) decoder.load_state_dict(checkpoint['decoder_state_dict']) # if cfg.NETWORK.USE_REFINER: # refiner.load_state_dict(checkpoint['refiner_state_dict']) refiner.load_state_dict(checkpoint['refiner_state_dict']) if cfg.NETWORK.USE_MERGER: merger.load_state_dict(checkpoint['merger_state_dict']) # Set up loss functions bce_loss = torch.nn.BCELoss() # Testing loop n_samples = len(test_data_loader) test_iou = dict() encoder_losses = utils.network_utils.AverageMeter() refiner_losses = utils.network_utils.AverageMeter() # Switch models to evaluation mode encoder.eval() decoder.eval() refiner.eval() merger.eval() for sample_idx, (taxonomy_id, sample_name, rendering_images, ground_truth_volume) in enumerate(test_data_loader): taxonomy_id = taxonomy_id[0] if isinstance(taxonomy_id[0], str) else taxonomy_id[0].item() sample_name = sample_name[0] with torch.no_grad(): # Get data from data loader rendering_images = utils.network_utils.var_or_cuda(rendering_images) ground_truth_volume = utils.network_utils.var_or_cuda(ground_truth_volume) # Test the encoder, decoder, refiner and merger image_features = encoder(rendering_images) raw_features, generated_volume = decoder(image_features) if cfg.NETWORK.USE_MERGER and epoch_idx >= cfg.TRAIN.EPOCH_START_USE_MERGER: generated_volume = merger(raw_features, generated_volume) else: generated_volume = torch.mean(generated_volume, dim=1) encoder_loss = bce_loss(generated_volume, ground_truth_volume) * 10 # if cfg.NETWORK.USE_REFINER and epoch_idx >= cfg.TRAIN.EPOCH_START_USE_REFINER: # generated_volume = refiner(generated_volume) # refiner_loss = bce_loss(generated_volume, ground_truth_volume) * 10 # else: # refiner_loss = encoder_loss generated_volume = refiner(generated_volume) refiner_loss = bce_loss(generated_volume, ground_truth_volume) * 10 # Append loss and accuracy to average metrics encoder_losses.update(encoder_loss.item()) refiner_losses.update(refiner_loss.item()) # IoU per sample sample_iou = [] for th in cfg.TEST.VOXEL_THRESH: _volume = torch.ge(generated_volume, th).float() intersection = torch.sum(_volume.mul(ground_truth_volume)).float() union = torch.sum(torch.ge(_volume.add(ground_truth_volume), 1)).float() sample_iou.append((intersection / union).item()) # IoU per taxonomy if taxonomy_id not in test_iou: test_iou[taxonomy_id] = {'n_samples': 0, 'iou': []} test_iou[taxonomy_id]['n_samples'] += 1 test_iou[taxonomy_id]['iou'].append(sample_iou) # Append generated volumes to TensorBoard if output_dir and sample_idx < 3: img_dir = output_dir % 'images' # Volume Visualization gv = generated_volume.cpu().numpy() rendering_views = utils.binvox_visualization.get_volume_views(gv, os.path.join(img_dir, 'test'), epoch_idx) test_writer.add_image('Test Sample#%02d/Volume Reconstructed' % sample_idx, rendering_views, epoch_idx) gtv = ground_truth_volume.cpu().numpy() rendering_views = utils.binvox_visualization.get_volume_views(gtv, os.path.join(img_dir, 'test'), epoch_idx) test_writer.add_image('Test Sample#%02d/Volume GroundTruth' % sample_idx, rendering_views, epoch_idx) # Print sample loss and IoU print('[INFO] %s Test[%d/%d] Taxonomy = %s Sample = %s EDLoss = %.4f RLoss = %.4f IoU = %s' % (dt.now(), sample_idx + 1, n_samples, taxonomy_id, sample_name, encoder_loss.item(), refiner_loss.item(), ['%.4f' % si for si in sample_iou])) # Output testing results mean_iou = [] for taxonomy_id in test_iou: test_iou[taxonomy_id]['iou'] = np.mean(test_iou[taxonomy_id]['iou'], axis=0) mean_iou.append(test_iou[taxonomy_id]['iou'] * test_iou[taxonomy_id]['n_samples']) mean_iou = np.sum(mean_iou, axis=0) / n_samples # Print header print('============================ TEST RESULTS ============================') print('Taxonomy', end='\t') print('#Sample', end='\t') print('Baseline', end='\t') for th in cfg.TEST.VOXEL_THRESH: print('t=%.2f' % th, end='\t') print() # Print body for taxonomy_id in test_iou: print('%s' % taxonomies[taxonomy_id]['taxonomy_name'].ljust(8), end='\t') print('%d' % test_iou[taxonomy_id]['n_samples'], end='\t') if 'baseline' in taxonomies[taxonomy_id]: print('%.4f' % taxonomies[taxonomy_id]['baseline']['%d-view' % cfg.CONST.N_VIEWS_RENDERING], end='\t\t') else: print('N/a', end='\t\t') for ti in test_iou[taxonomy_id]['iou']: print('%.4f' % ti, end='\t') print() # Print mean IoU for each threshold print('Overall ', end='\t\t\t\t') for mi in mean_iou: print('%.4f' % mi, end='\t') print('\n') # Add testing results to TensorBoard max_iou = np.max(mean_iou) if test_writer is not None: test_writer.add_scalar('EncoderDecoder/EpochLoss', encoder_losses.avg, epoch_idx) test_writer.add_scalar('Refiner/EpochLoss', refiner_losses.avg, epoch_idx) test_writer.add_scalar('Refiner/IoU', max_iou, epoch_idx) return max_iou
def fun(weight): standard = torch.zeros_like(weight) weight = torch.ge(weight, standard).float().cuda() return weight
def forward(self, predictions: Tuple[torch.Tensor, torch.Tensor, torch.Tensor], labels: List[torch.Tensor]): def decode(boxes: torch.Tensor) \ -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: width, height = boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1] return width, height, boxes[:, 0] + .5 * width, boxes[:, 1] + .5 * height classifications, regressions, anchor = predictions classification_losses, regression_losses = [], [] device = anchor.device anchor_widths, anchor_heights, anchor_ctr_x, anchor_ctr_y = decode( anchor) for classification, regression, target in zip(classifications, regressions, labels): if target.size == 0: regression_losses.append( torch.tensor(0, dtype=torch.float32, device=device)) classification_losses.append( torch.tensor(0, dtype=torch.float32, device=device)) continue classification = torch.clamp(classification, 1e-4, 1. - 1e-4) # num_anchors x num_annotations IoU = calc_iou(anchor, target[:, :4]) IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1 # compute the loss for classification targets = torch.ones(classification.shape, device=device) * -1 targets[torch.lt(IoU_max, .4), :] = 0 positive_indices = torch.ge(IoU_max, .5) num_positive_anchors = positive_indices.sum() assigned_annotations = target[IoU_argmax, :] targets[positive_indices, :] = 0 targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1 alpha_factor = torch.ones(targets.shape, device=device) * self.alpha alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor) focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification) focal_weight = alpha_factor * torch.pow(focal_weight, self.gamma) bce = -(targets * torch.log(classification) + (1. - targets) * torch.log(1. - classification)) # cls_loss = focal_weight * torch.pow(bce, gamma) cls_loss = focal_weight * bce cls_loss = torch.where(torch.ne(targets, -1.), cls_loss, torch.zeros(cls_loss.shape, device=device)) classification_losses.append( cls_loss.sum() / torch.clamp(num_positive_anchors.float(), min=1.)) # compute the loss for regression if positive_indices.sum() > 0: assigned_annotations = assigned_annotations[ positive_indices, :] anchor_widths_pi = anchor_widths[positive_indices] anchor_heights_pi = anchor_heights[positive_indices] anchor_ctr_x_pi = anchor_ctr_x[positive_indices] anchor_ctr_y_pi = anchor_ctr_y[positive_indices] gt_widths, gt_heights, gt_ctr_x, gt_ctr_y = decode( assigned_annotations) # clip widths to 1 gt_widths = torch.clamp(gt_widths, min=1) gt_heights = torch.clamp(gt_heights, min=1) targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi targets_dw = torch.log(gt_widths / anchor_widths_pi) targets_dh = torch.log(gt_heights / anchor_heights_pi) targets = torch.stack( (targets_dx, targets_dy, targets_dw, targets_dh)) targets = targets.t() targets = targets / torch.Tensor([[0.1, 0.1, 0.2, 0.2] ]).to(device) negative_indices = 1 + (~positive_indices) regression_diff = torch.abs(targets - regression[positive_indices, :]) regression_loss = torch.where( torch.le(regression_diff, 1. / 9.), .5 * 9. * torch.pow(regression_diff, 2), regression_diff - .5 / 9.) regression_losses.append(regression_loss.mean()) else: regression_losses.append( torch.tensor(0, dtype=torch.float32, device=device)) return torch.stack(classification_losses).mean(dim=0, keepdim=True), \ torch.stack(regression_losses).mean(dim=0, keepdim=True)
def build_resnet_pruned_model(origin_model): pruning_rate_now = 0 channel_prune_rate = 0.9 num_mask_cfg = {'resnet56': 55, 'resnet110': 109} while pruning_rate_now < args.pruning_rate: score = [] index_cfg = [] block_index_cfg = [] layer_cfg = [] block_cfg = [] final_mask = [] pruned_state_dict = {} # Get importance criteria for each channel for i in range(num_mask_cfg[args.cfg]): mask = origin_model.state_dict()['mask.' + str(i)] score.append(torch.abs(torch.sum(mask, 0))) final_mask.append(torch.div(torch.sum(mask, 0), 2)) all_score = torch.cat(score, 0) preserve_num = int(all_score.size(0) * channel_prune_rate) preserve_channel, _ = torch.topk(all_score, preserve_num) threshold = preserve_channel[preserve_num - 1] block_score = [] #Based on the pruning threshold, the pruning rate of each layer is obtained for i, mini_score in enumerate(score): mask = torch.ge(mini_score, threshold) index = [] for j, m in enumerate(mask): if m == True: index.append(j) if len(index) < mask.size(0) * args.min_preserve: _, index = torch.topk(mini_score, int(mask.size(0) * args.min_preserve)) index = index.cpu().numpy().tolist() if i % 2 != 0: # in block index_cfg.append(index) layer_cfg.append(len(index)) else: # out block block_score.append(mini_score) begin = 0 end = int(num_mask_cfg[args.cfg] / 6) + 1 for i in range(3): block_cfg.append(block_score[begin].size(0)) for i in block_score[begin:end]: block_index_cfg.append(torch.arange( block_score[begin].size(0))) begin = end end = end + int(num_mask_cfg[args.cfg] / 6) model = import_module(f'model.{args.arch}').resnet( args.cfg, layer_cfg, block_cfg).to(device) flops, params = profile(model, inputs=(input, )) pruning_rate_now = (oriflops - flops) / oriflops channel_prune_rate = channel_prune_rate - 0.01 model_state_dict = origin_model.state_dict() current_block = 0 block_index = torch.LongTensor(block_index_cfg[0]).to(device) mask = final_mask[0][block_index_cfg[0]] pruned_state_dict['conv1.weight'] = torch.index_select( model_state_dict['conv1.weight'], 0, block_index).cpu() pruned_state_dict['bn1.weight'] = torch.mul( mask, model_state_dict['bn1.weight'][block_index]).cpu() pruned_state_dict['bn1.bias'] = torch.mul( mask, model_state_dict['bn1.bias'][block_index]).cpu() pruned_state_dict['bn1.running_var'] = model_state_dict['bn1.running_var'][ block_index].cpu() pruned_state_dict['bn1.running_mean'] = model_state_dict[ 'bn1.running_mean'][block_index].cpu() for name, module in origin_model.named_modules(): if isinstance(module, ResBasicBlock_Class): # conv1 & bn1 index = torch.LongTensor(index_cfg[current_block]).to(device) pruned_weight = torch.index_select( model_state_dict[name + '.conv1.weight'], 0, index).cpu() pruned_weight = direct_project(pruned_weight, block_index) pruned_state_dict[name + '.conv1.weight'] = pruned_weight mask = final_mask[current_block * 2 + 1][index_cfg[current_block]] pruned_state_dict[name + '.bn1.weight'] = torch.mul( mask, model_state_dict[name + '.bn1.weight'][index]).cpu() pruned_state_dict[name + '.bn1.bias'] = torch.mul( mask, model_state_dict[name + '.bn1.bias'][index]).cpu() pruned_state_dict[name + '.bn1.running_var'] = model_state_dict[ name + '.bn1.running_var'][index].cpu() pruned_state_dict[name + '.bn1.running_mean'] = model_state_dict[ name + '.bn1.running_mean'][index].cpu() block_index = torch.LongTensor(block_index_cfg[current_block + 1]).to(device) mask = final_mask[current_block * 2 + 2][block_index_cfg[current_block + 1]] # conv2 & bn2 & shortcut pruned_state_dict[name + '.conv2.weight'] = torch.index_select( model_state_dict[name + '.conv2.weight'], 0, block_index).cpu() pruned_state_dict[name + '.conv2.weight'] = direct_project( pruned_state_dict[name + '.conv2.weight'], index) pruned_state_dict[name + '.bn2.weight'] = torch.mul( mask, model_state_dict[name + '.bn2.weight'][block_index]).cpu() pruned_state_dict[name + '.bn2.bias'] = torch.mul( mask, model_state_dict[name + '.bn2.bias'][block_index]).cpu() pruned_state_dict[name + '.bn2.running_var'] = model_state_dict[ name + '.bn2.running_var'][block_index].cpu() pruned_state_dict[name + '.bn2.running_mean'] = model_state_dict[ name + '.bn2.running_mean'][block_index].cpu() current_block += 1 fc_weight = model_state_dict['fc.weight'].cpu() pr_fc_weight = torch.randn(fc_weight.size(0), len(block_index)) for i, ind in enumerate(block_index): pr_fc_weight[:, i] = fc_weight[:, ind] pruned_state_dict['fc.weight'] = pr_fc_weight.cpu() pruned_state_dict['fc.bias'] = model_state_dict['fc.bias'].cpu() # load weight model = import_module(f'model.{args.arch}').resnet(args.cfg, layer_cfg, block_cfg).to(device) model.load_state_dict(pruned_state_dict) return model, [layer_cfg, block_cfg], flops, params
def reid(self, blob, new_det_pos, new_det_scores): """Tries to ReID inactive tracks with provided detections.""" new_det_features = [torch.zeros(0) for _ in range(len(new_det_pos))] if self.do_reid: new_det_features = self.reid_network.test_rois( blob['img'], new_det_pos).data if len(self.inactive_tracks) >= 1: # calculate appearance distances dist_mat, pos = [], [] for t in self.inactive_tracks: dist_mat.append( torch.cat([ t.test_features(feat.view(1, -1)) for feat in new_det_features ], dim=1)) pos.append(t.pos) if len(dist_mat) > 1: dist_mat = torch.cat(dist_mat, 0) pos = torch.cat(pos, 0) else: dist_mat = dist_mat[0] pos = pos[0] # calculate IoU distances iou = bbox_overlaps(pos, new_det_pos) iou_mask = torch.ge(iou, self.reid_iou_threshold) iou_neg_mask = ~iou_mask # make all impossible assignments to the same add big value dist_mat = dist_mat * iou_mask.float() + iou_neg_mask.float( ) * 1000 dist_mat = dist_mat.cpu().numpy() row_ind, col_ind = linear_sum_assignment(dist_mat) assigned = [] remove_inactive = [] for r, c in zip(row_ind, col_ind): if dist_mat[r, c] <= self.reid_sim_threshold: t = self.inactive_tracks[r] self.tracks.append(t) t.count_inactive = 0 t.pos = new_det_pos[c].view(1, -1) t.reset_last_pos() t.add_features(new_det_features[c].view(1, -1)) assigned.append(c) remove_inactive.append(t) for t in remove_inactive: self.inactive_tracks.remove(t) keep = torch.Tensor([ i for i in range(new_det_pos.size(0)) if i not in assigned ]).long() if keep.nelement() > 0: new_det_pos = new_det_pos[keep] new_det_scores = new_det_scores[keep] new_det_features = new_det_features[keep] else: new_det_pos = torch.zeros(0) new_det_scores = torch.zeros(0) new_det_features = torch.zeros(0) return new_det_pos, new_det_scores, new_det_features
def step(self, blob): """This function should be called every timestep to perform tracking with a blob containing the image information. """ for t in self.tracks: # add current position to last_pos list t.last_pos.append(t.pos.clone()) ########################### # Look for new detections # ########################### self.obj_detect.load_image(blob['img']) if self.public_detections: dets = blob['dets'].squeeze(dim=0) if dets.nelement() > 0: boxes, scores = self.obj_detect.predict_boxes(dets) else: boxes = scores = torch.zeros(0) else: boxes, scores = self.obj_detect.detect(blob['img']) if boxes.nelement() > 0: boxes = clip_boxes_to_image(boxes, blob['img'].shape[-2:]) # Filter out tracks that have too low person score inds = torch.gt(scores, self.detection_person_thresh).nonzero().view(-1) else: inds = torch.zeros(0) if inds.nelement() > 0: det_pos = boxes[inds] det_scores = scores[inds] else: det_pos = torch.zeros(0) det_scores = torch.zeros(0) ################## # Predict tracks # ################## num_tracks = 0 nms_inp_reg = torch.zeros(0) if len(self.tracks): # align if self.do_align: self.align(blob) # apply motion model if self.motion_model_cfg['enabled']: self.motion() self.tracks = [t for t in self.tracks if t.has_positive_area()] # regress person_scores = self.regress_tracks(blob) if len(self.tracks): # create nms input # nms here if tracks overlap keep = nms(self.get_pos(), person_scores, self.regression_nms_thresh) self.tracks_to_inactive([ self.tracks[i] for i in list(range(len(self.tracks))) if i not in keep ]) if keep.nelement() > 0 and self.do_reid: new_features = self.get_appearances(blob) self.add_features(new_features) ##################### # Create new tracks # ##################### # !!! Here NMS is used to filter out detections that are already covered by tracks. This is # !!! done by iterating through the active tracks one by one, assigning them a bigger score # !!! than 1 (maximum score for detections) and then filtering the detections with NMS. # !!! In the paper this is done by calculating the overlap with existing tracks, but the # !!! result stays the same. if det_pos.nelement() > 0: keep = nms(det_pos, det_scores, self.detection_nms_thresh) det_pos = det_pos[keep] det_scores = det_scores[keep] # check with every track in a single run (problem if tracks delete each other) for t in self.tracks: nms_track_pos = torch.cat([t.pos, det_pos]) nms_track_scores = torch.cat( [torch.tensor([2.0]).to(det_scores.device), det_scores]) keep = nms(nms_track_pos, nms_track_scores, self.detection_nms_thresh) keep = keep[torch.ge(keep, 1)] - 1 det_pos = det_pos[keep] det_scores = det_scores[keep] if keep.nelement() == 0: break if det_pos.nelement() > 0: new_det_pos = det_pos new_det_scores = det_scores # try to reidentify tracks new_det_pos, new_det_scores, new_det_features = self.reid( blob, new_det_pos, new_det_scores) # add new if new_det_pos.nelement() > 0: self.add(new_det_pos, new_det_scores, new_det_features) #################### # Generate Results # #################### for t in self.tracks: if t.id not in self.results.keys(): self.results[t.id] = {} self.results[t.id][self.im_index] = np.concatenate( [t.pos[0].cpu().numpy(), np.array([t.score])]) for t in self.inactive_tracks: t.count_inactive += 1 self.inactive_tracks = [ t for t in self.inactive_tracks if t.has_positive_area() and t.count_inactive <= self.inactive_patience ] self.im_index += 1 self.last_image = blob['img'][0]
def compute_projection(self, depth, camera_to_world, world_to_grid): # compute projection by voxels -> image #print 'camera_to_world', camera_to_world #print 'intrinsic', self.intrinsic #print(world_to_grid) world_to_camera = torch.inverse(camera_to_world) grid_to_world = torch.inverse(world_to_grid) voxel_bounds_min, voxel_bounds_max = self.compute_frustum_bounds(world_to_grid, camera_to_world) voxel_bounds_min = np.maximum(voxel_bounds_min, 0).cuda().float() if depth.is_cuda else np.maximum(voxel_bounds_min, 0).cpu().float() voxel_bounds_max = np.minimum(voxel_bounds_max, self.volume_dims).cuda().float() if depth.is_cuda else np.minimum(voxel_bounds_max, self.volume_dims).cpu().float() # coordinates within frustum bounds # TODO python opt for this part instead of lua/torch opt? lin_ind_volume = torch.arange(0, self.volume_dims[0]*self.volume_dims[1]*self.volume_dims[2], out=torch.LongTensor()) lin_ind_volume = lin_ind_volume.cuda() if depth.is_cuda else lin_ind_volume.cpu() coords = camera_to_world.new(4, lin_ind_volume.size(0)) coords[2] = lin_ind_volume / (self.volume_dims[0]*self.volume_dims[1]) tmp = lin_ind_volume - (coords[2]*self.volume_dims[0]*self.volume_dims[1]).long() coords[1] = tmp / self.volume_dims[0] coords[0] = torch.remainder(tmp, self.volume_dims[0]) coords[3].fill_(1) mask_frustum_bounds = torch.ge(coords[0], voxel_bounds_min[0]) * torch.ge(coords[1], voxel_bounds_min[1]) * torch.ge(coords[2], voxel_bounds_min[2]) mask_frustum_bounds = mask_frustum_bounds * torch.lt(coords[0], voxel_bounds_max[0]) * torch.lt(coords[1], voxel_bounds_max[1]) * torch.lt(coords[2], voxel_bounds_max[2]) if not mask_frustum_bounds.any(): print('error: nothing in frustum bounds') return None lin_ind_volume = lin_ind_volume[mask_frustum_bounds] coords = coords.resize_(4, lin_ind_volume.size(0)) coords[2] = lin_ind_volume / (self.volume_dims[0]*self.volume_dims[1]) tmp = lin_ind_volume - (coords[2]*self.volume_dims[0]*self.volume_dims[1]).long() coords[1] = tmp / self.volume_dims[0] coords[0] = torch.remainder(tmp, self.volume_dims[0]) coords[3].fill_(1) # transform to current frame p = torch.mm(world_to_camera, torch.mm(grid_to_world, coords)) # project into image p[0] = (p[0] * self.intrinsic[0][0]) / p[2] + self.intrinsic[0][2] p[1] = (p[1] * self.intrinsic[1][1]) / p[2] + self.intrinsic[1][2] pi = torch.round(p).long() valid_ind_mask = torch.ge(pi[0], 0) * torch.ge(pi[1], 0) * torch.lt(pi[0], self.image_dims[0]) * torch.lt(pi[1], self.image_dims[1]) if not valid_ind_mask.any(): print('error: no valid image indices') return None valid_image_ind_x = pi[0][valid_ind_mask] valid_image_ind_y = pi[1][valid_ind_mask] valid_image_ind_lin = valid_image_ind_y * self.image_dims[0] + valid_image_ind_x depth_vals = torch.index_select(depth.view(-1), 0, valid_image_ind_lin) depth_mask = depth_vals.ge(self.depth_min) * depth_vals.le(self.depth_max) * torch.abs(depth_vals - p[2][valid_ind_mask]).le(self.voxel_size) if not depth_mask.any(): print('error: no valid depths') return None lin_ind_update = lin_ind_volume[valid_ind_mask] lin_ind_update = lin_ind_update[depth_mask] lin_indices_3d = lin_ind_update.new(self.volume_dims[0]*self.volume_dims[1]*self.volume_dims[2] + 1) #needs to be same size for all in batch... (first element has size) lin_indices_2d = lin_ind_update.new(self.volume_dims[0]*self.volume_dims[1]*self.volume_dims[2] + 1) #needs to be same size for all in batch... (first element has size) lin_indices_3d[0] = lin_ind_update.shape[0] lin_indices_2d[0] = lin_ind_update.shape[0] lin_indices_3d[1:1+lin_indices_3d[0]] = lin_ind_update lin_indices_2d[1:1+lin_indices_2d[0]] = torch.index_select(valid_image_ind_lin, 0, torch.nonzero(depth_mask)[:,0]) num_ind = lin_indices_3d[0] #print '[proj] #ind = ', lin_indices_3d[0] #print '2d', torch.min(lin_indices_2d[1:1+num_ind]), torch.max(lin_indices_2d[1:1+num_ind]) #print '3d', torch.min(lin_indices_3d[1:1+num_ind]), torch.max(lin_indices_3d[1:1+num_ind]) return lin_indices_3d, lin_indices_2d
# print('gt_label is ', gt_label) gt_offset = gt_offset.type(torch.FloatTensor).to(device) # print('gt_offset shape is ',gt_offset.shape) # zero the parameter gradients optimizer.zero_grad() # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): pred_offsets, pred_label = model(input_images) pred_offsets = torch.squeeze(pred_offsets) pred_label = torch.squeeze(pred_label) # calculate the cls loss # get the mask element which >= 0, only 0 and 1 can effect the detection loss mask_cls = torch.ge(gt_label, 0) valid_gt_label = gt_label[mask_cls] valid_pred_label = pred_label[mask_cls] # calculate the box loss # get the mask element which != 0 unmask = torch.eq(gt_label, 0) mask_offset = torch.eq(unmask, 0) valid_gt_offset = gt_offset[mask_offset] valid_pred_offset = pred_offsets[mask_offset] loss = torch.tensor(0.0).to(device) cls_loss, offset_loss = 0.0, 0.0 eval_correct = 0.0 num_gt = len(valid_gt_label)
def easy(x, y): c = torch.ge(x, y) return c
def forward(self, mol_tree_batch, x_tree_vecs, x_mol_vecs, reserve_x_tree_vecs, reserve_x_mol_vecs): pred_hiddens, pred_contexts, pred_targets = [], [], [] stop_hiddens, stop_contexts, stop_targets = [], [], [] traces = [] for mol_tree in mol_tree_batch: s = [] dfs(s, mol_tree.nodes[0], -1) traces.append(s) for node in mol_tree.nodes: node.neighbors = [] #Predict Root batch_size = len(mol_tree_batch) max_iter = max([len(tr) for tr in traces]) padding = create_var(torch.zeros(self.hidden_size), False) h = {} for t in range(max_iter): prop_list = [] batch_list = [] for i, plist in enumerate(traces): if t < len(plist): prop_list.append(plist[t]) batch_list.append(i) cur_x = [] cur_h_nei, cur_o_nei = [], [] for node_x, real_y, _ in prop_list: #Neighbors for message passing (target not included) cur_nei = [h[(node_y.idx, node_x.idx)] for node_y in node_x.neighbors if node_y.idx != real_y.idx] pad_len = MAX_NB - len(cur_nei) # TODO: zyj if pad_len < 0: cur_nei = cur_nei[:MAX_NB] # ========================= cur_h_nei.extend(cur_nei) cur_h_nei.extend([padding] * pad_len) #Neighbors for stop prediction (all neighbors) cur_nei = [h[(node_y.idx,node_x.idx)] for node_y in node_x.neighbors] pad_len = MAX_NB - len(cur_nei) # TODO: zyj if pad_len < 0: cur_nei = cur_nei[:MAX_NB] # ========================== cur_o_nei.extend(cur_nei) cur_o_nei.extend([padding] * pad_len) #Current clique embedding cur_x.append(node_x.wid) #Clique embedding cur_x = create_var(torch.LongTensor(cur_x)) cur_x = self.embedding(cur_x) #Message passing cur_h_nei = torch.stack(cur_h_nei, dim=0).view(-1, MAX_NB, self.hidden_size) new_h = GRU(cur_x, cur_h_nei, self.W_z, self.W_r, self.U_r, self.W_h) #Node Aggregate cur_o_nei = torch.stack(cur_o_nei, dim=0).view(-1,MAX_NB,self.hidden_size) cur_o = cur_o_nei.sum(dim=1) #Gather targets pred_target,pred_list = [],[] stop_target = [] for i,m in enumerate(prop_list): node_x,node_y,direction = m x,y = node_x.idx,node_y.idx h[(x,y)] = new_h[i] node_y.neighbors.append(node_x) if direction == 1: pred_target.append(node_y.wid) pred_list.append(i) stop_target.append(direction) #Hidden states for stop prediction cur_batch = create_var(torch.LongTensor(batch_list)) stop_hidden = torch.cat([cur_x,cur_o], dim=1) stop_hiddens.append( stop_hidden ) stop_contexts.append( cur_batch ) stop_targets.extend( stop_target ) #Hidden states for clique prediction if len(pred_list) > 0: batch_list = [batch_list[i] for i in pred_list] cur_batch = create_var(torch.LongTensor(batch_list)) pred_contexts.append( cur_batch ) cur_pred = create_var(torch.LongTensor(pred_list)) pred_hiddens.append( new_h.index_select(0, cur_pred) ) pred_targets.extend( pred_target ) #Last stop at root cur_x,cur_o_nei = [],[] for mol_tree in mol_tree_batch: node_x = mol_tree.nodes[0] cur_x.append(node_x.wid) cur_nei = [h[(node_y.idx,node_x.idx)] for node_y in node_x.neighbors] pad_len = MAX_NB - len(cur_nei) cur_o_nei.extend(cur_nei) cur_o_nei.extend([padding] * pad_len) cur_x = create_var(torch.LongTensor(cur_x)) cur_x = self.embedding(cur_x) cur_o_nei = torch.stack(cur_o_nei, dim=0).view(-1,MAX_NB,self.hidden_size) cur_o = cur_o_nei.sum(dim=1) stop_hidden = torch.cat([cur_x,cur_o], dim=1) stop_hiddens.append( stop_hidden ) stop_contexts.append( create_var( torch.LongTensor(list(range(batch_size))) ) ) stop_targets.extend([0] * len(mol_tree_batch)) #Predict next clique pred_contexts = torch.cat(pred_contexts, dim=0) pred_hiddens = torch.cat(pred_hiddens, dim=0) pred_scores = self.attention(pred_hiddens, pred_contexts, x_tree_vecs, x_mol_vecs, reserve_x_tree_vecs, reserve_x_mol_vecs, 'word') pred_targets = create_var(torch.LongTensor(pred_targets)) pred_loss = self.pred_loss(pred_scores, pred_targets) / len(mol_tree_batch) _,preds = torch.max(pred_scores, dim=1) pred_acc = torch.eq(preds, pred_targets).float() pred_acc = torch.sum(pred_acc) / pred_targets.nelement() #Predict stop stop_contexts = torch.cat(stop_contexts, dim=0) stop_hiddens = torch.cat(stop_hiddens, dim=0) stop_hiddens = F.relu( self.U_i(stop_hiddens) ) stop_scores = self.attention(stop_hiddens, stop_contexts, x_tree_vecs, x_mol_vecs, reserve_x_tree_vecs, reserve_x_mol_vecs, 'stop') stop_scores = stop_scores.squeeze(-1) stop_targets = create_var(torch.Tensor(stop_targets)) stop_loss = self.stop_loss(stop_scores, stop_targets) / len(mol_tree_batch) stops = torch.ge(stop_scores, 0).float() stop_acc = torch.eq(stops, stop_targets).float() stop_acc = torch.sum(stop_acc) / stop_targets.nelement() return pred_loss, stop_loss, pred_acc.item(), stop_acc.item()
def build_vgg_pruned_model(origin_model): pruning_rate_now = 0 channel_prune_rate = 0.9 while pruning_rate_now < args.pruning_rate: score = [] index_cfg = [] layer_cfg = [] final_mask = [] pruned_state_dict = {} # Get importance criteria for each channel for i in range(12): mask = origin_model.state_dict()['mask.' + str(i)] score.append(torch.abs(torch.div(torch.sum(mask, 0), 2))) final_mask.append(torch.div(torch.sum(mask, 0), 2)) all_score = torch.cat(score, 0) preserve_num = int(all_score.size(0) * channel_prune_rate) preserve_channel, _ = torch.topk(all_score, preserve_num) threshold = preserve_channel[preserve_num - 1] # Based on the pruning threshold, the pruning rate of each layer is obtained for mini_score in score: mask = torch.ge(mini_score, threshold) index = [] for i, m in enumerate(mask): if m == True: index.append(i) if len(index) < mask.size(0) * args.min_preserve: _, index = torch.topk(mini_score, int(mask.size(0) * args.min_preserve)) index = index.cpu().numpy().tolist() index_cfg.append(index) layer_cfg.append(len(index)) last_layer_cfg = int(512 * (1 - pruning_rate_now)) last_index = random.sample(range(0, 512), last_layer_cfg) last_index.sort() index_cfg.append(last_index) layer_cfg.append(last_layer_cfg) model = import_module(f'model.{args.arch}').VGG( args.cfg, layer_cfg=layer_cfg).to(device) # Update current pruning rate \alpha flops, params = profile(model, inputs=(input, )) pruning_rate_now = (oriflops - flops) / oriflops channel_prune_rate = channel_prune_rate - 0.01 model_state_dict = origin_model.state_dict() current_layer = 0 for name, module in origin_model.named_modules(): if isinstance(module, nn.Conv2d): index = torch.LongTensor(index_cfg[current_layer]).to(device) pruned_weight = torch.index_select( model_state_dict[name + '.weight'], 0, index).cpu() pruned_bias = model_state_dict[name + '.bias'][index].cpu() pruned_state_dict[name + '.weight'] = pruned_weight pruned_state_dict[name + '.bias'] = pruned_bias elif isinstance(module, nn.BatchNorm2d): if current_layer == 12: pruned_state_dict[name + '.weight'] = model_state_dict[ name + '.weight'][index].cpu() pruned_state_dict[name + '.bias'] = model_state_dict[ name + '.bias'][index].cpu() else: mask = final_mask[current_layer][index_cfg[current_layer]] pruned_state_dict[name + '.weight'] = torch.mul( mask, model_state_dict[name + '.weight'][index]).cpu() pruned_state_dict[name + '.bias'] = torch.mul( mask, model_state_dict[name + '.bias'][index]).cpu() pruned_state_dict[name + '.running_var'] = model_state_dict[ name + '.running_var'][index].cpu() pruned_state_dict[name + '.running_mean'] = model_state_dict[ name + '.running_mean'][index].cpu() current_layer += 1 # load weight model = import_module(f'model.{args.arch}').VGG( args.cfg, layer_cfg=layer_cfg).to(device) current_layer = 0 for i, (k, v) in enumerate(pruned_state_dict.items()): weight = torch.FloatTensor(pruned_state_dict[k]) if i == 0: # first conv need not to prune channel continue if weight.dim() == 4: # conv_layer pruned_state_dict[k] = direct_project(weight, index_cfg[current_layer]) current_layer += 1 fc_weight = model_state_dict['classifier.weight'] pr_fc_weight = torch.randn(fc_weight.size(0), len(index)) for i, ind in enumerate(index): pr_fc_weight[:, i] = fc_weight[:, ind] pruned_state_dict['classifier.weight'] = pr_fc_weight.cpu() pruned_state_dict['classifier.bias'] = model_state_dict['classifier.bias'] model = import_module(f'model.{args.arch}').VGG( args.cfg, layer_cfg=layer_cfg).to(device) model.load_state_dict(pruned_state_dict) return model, layer_cfg, flops, params
def forward(self, inputs, labels, margin): self.feature = self.model(inputs) features = F.normalize(self.feature, p=2, dim=1) num = labels.size()[0] m_label = labels.data.cpu().numpy() cmp = Variable(torch.Tensor([0]).cuda()).view(-1, 1) count = 0 error = 0 criterion = nn.MarginRankingLoss(margin) distall = {} #calculate pair distance for i in xrange(num): tmp_data = features[i].view(1, -1) tmp_data = tmp_data.expand(features.size()) dist_all = torch.pow(F.pairwise_distance(tmp_data, features, 2), 2) distall[i] = dist_all prev = -1 boundary = [] for i in xrange(m_label.shape[0]): if prev != m_label[i][0]: boundary.append(m_label[i][0]) prev = m_label[i][0] labels_dict = {} for label in boundary: b = np.where(m_label == label) labels_dict[label] = [b[0][0], b[0][-1]] all_triplets_num = 0 nums_loss = 0 count_sum = 0 num_ap_an = 0 dist_an_all_end = Variable(torch.Tensor([[0]]).cuda()) dist_ap_end = Variable(torch.Tensor([[0]]).cuda()) for i in xrange(num): left_p, right_p = labels_dict[m_label[i][0]][0], labels_dict[ m_label[i][0]][1] for j in xrange(left_p, right_p): count_sum += 1 if j == i: continue dist_ap = distall[i][j] if left_p == 0: dist_an_all = distall[i][right_p:] else: if right_p == len(labels) - 1: dist_an_all = distall[i][0:left_p] else: dist_an_all = torch.cat( (distall[i][0:left_p], distall[i][right_p:]), 0) dist_ap = dist_ap.view(1, -1) dist_ap = dist_ap.expand(dist_an_all.size()) #print dist_ap,dist_an_all.data num_ap_an += torch.sum(torch.ge( dist_ap.data, dist_an_all.data)) #nums of ap>=an(error) to make ap<=an all_triplets_num += dist_ap.size()[0] #batchloss+=criterion(dist_an_all,dist_ap,tmp) dist_an_all_end = torch.cat((dist_an_all_end, dist_an_all), 0) dist_ap_end = torch.cat((dist_ap_end, dist_ap), 0) dist_an_all_end = dist_an_all_end[1:] dist_ap_end = dist_ap_end[1:] target = Variable( torch.FloatTensor(dist_ap_end.size()).fill_(1).cuda()) batchloss = criterion(dist_an_all_end, dist_ap_end, target) #batchloss=batchloss/all_triplets_num batchloss = batchloss accuracy = 1 - num_ap_an * 1.0 / all_triplets_num return batchloss, accuracy