a, _ = train_dataset[int(rp[i])] t = a[0].item() # index of first token in the sequence counts[t] +=1 prob = counts/counts.sum() %%time from mingpt.utils import sample n_samples = 32 start_pixel = np.random.choice(np.arange(C.size(0)), size=(n_samples, 1), replace=True, p=prob) start_pixel = torch.from_numpy(start_pixel).to(trainer.device) pixels = sample(model, start_pixel, 32*32-1, temperature=1.0, sample=True, top_k=100) # for visualization we have to invert the permutation used to produce the pixels iperm = torch.argsort(train_dataset.perm) ncol = 8 nrow = n_samples // ncoal plt.figure(figsize = (16, 8)) for i in range(n_samples): pxi = pixels[i][iperm] # note: undo the encoding permutation plt.subplot(nrow, ncol, i+1) plt.imshow(C[pxi].view(32, 32, 3).numpy().astype(np.unit8)) plt.axis('off') #visualize some of the learned positional embeddings, maybe they contain structure plt. figure(figure=(5, 5)) nsee = 8*8 ncol = 8
def get_objf(batch: Dict, model: AcousticModel, P: k2.Fsa, device: torch.device, graph_compiler: MmiTrainingGraphCompiler, is_training: bool, is_update: bool, accum_grad: int = 1, den_scale: float = 1.0, att_rate: float = 0.0, tb_writer: Optional[SummaryWriter] = None, global_batch_idx_train: Optional[int] = None, optimizer: Optional[torch.optim.Optimizer] = None): feature = batch['features'] supervisions = batch['supervisions'] supervision_segments = torch.stack( (supervisions['sequence_idx'], (((supervisions['start_frame'] - 1) // 2 - 1) // 2), (((supervisions['num_frames'] - 1) // 2 - 1) // 2)), 1).to(torch.int32) supervision_segments = torch.clamp(supervision_segments, min=0) indices = torch.argsort(supervision_segments[:, 2], descending=True) supervision_segments = supervision_segments[indices] texts = supervisions['text'] texts = [texts[idx] for idx in indices] assert feature.ndim == 3 # print(supervision_segments[:, 1] + supervision_segments[:, 2]) feature = feature.to(device) # at entry, feature is [N, T, C] feature = feature.permute(0, 2, 1) # now feature is [N, C, T] if is_training: nnet_output, encoder_memory, memory_mask = model(feature, supervisions) if att_rate != 0.0: att_loss = model.decoder_forward(encoder_memory, memory_mask, supervisions, graph_compiler) else: with torch.no_grad(): nnet_output, encoder_memory, memory_mask = model( feature, supervisions) if att_rate != 0.0: att_loss = model.decoder_forward(encoder_memory, memory_mask, supervisions, graph_compiler) # nnet_output is [N, C, T] nnet_output = nnet_output.permute(0, 2, 1) # now nnet_output is [N, T, C] if is_training: num, den = graph_compiler.compile(texts, P) else: with torch.no_grad(): num, den = graph_compiler.compile(texts, P) assert num.requires_grad == is_training assert den.requires_grad is False num = num.to(device) den = den.to(device) # nnet_output2 = nnet_output.clone() # blank_bias = -7.0 # nnet_output2[:,:,0] += blank_bias dense_fsa_vec = k2.DenseFsaVec(nnet_output, supervision_segments) assert nnet_output.device == device num = k2.intersect_dense(num, dense_fsa_vec, 10.0) den = k2.intersect_dense(den, dense_fsa_vec, 10.0) num_tot_scores = num.get_tot_scores(log_semiring=True, use_double_scores=True) den_tot_scores = den.get_tot_scores(log_semiring=True, use_double_scores=True) tot_scores = num_tot_scores - den_scale * den_tot_scores (tot_score, tot_frames, all_frames) = get_tot_objf_and_num_frames(tot_scores, supervision_segments[:, 2]) if is_training: def maybe_log_gradients(tag: str): if tb_writer is not None and global_batch_idx_train is not None and global_batch_idx_train % 200 == 0: tb_writer.add_scalars(tag, measure_gradient_norms(model, norm='l1'), global_step=global_batch_idx_train) if att_rate != 0.0: loss = (-(1.0 - att_rate) * tot_score + att_rate * att_loss) / (len(texts) * accum_grad) else: loss = (-tot_score) / (len(texts) * accum_grad) loss.backward() if is_update: maybe_log_gradients('train/grad_norms') clip_grad_value_(model.parameters(), 5.0) maybe_log_gradients('train/clipped_grad_norms') if (global_batch_idx_train // accum_grad) % 200 == 0: # Once in a time we will perform a more costly diagnostic # to check the relative parameter change per minibatch. deltas = optim_step_and_measure_param_change(model, optimizer) tb_writer.add_scalars( 'train/relative_param_change_per_minibatch', deltas, global_step=global_batch_idx_train) else: optimizer.step() optimizer.zero_grad() ans = -tot_score.detach().cpu().item(), tot_frames.cpu().item( ), all_frames.cpu().item() return ans
def test_compute_non_dominated_hypercell_bounds_2d(self): ref_point_raw = torch.zeros(2, device=self.device) arange = torch.arange(3, 9, device=self.device) pareto_Y_raw = torch.stack([arange, 11 - arange], dim=-1) inf = float("inf") expected_cell_bounds_raw = torch.tensor( [ [ [8.0, 0.0], [7.0, 3.0], [6.0, 4.0], [5.0, 5.0], [4.0, 6.0], [3.0, 7.0], [0.0, 8.0], ], [ [inf, inf], [8.0, inf], [7.0, inf], [6.0, inf], [5.0, inf], [4.0, inf], [3.0, inf], ], ], device=self.device, ) for dtype in (torch.float, torch.double): pareto_Y = pareto_Y_raw.to(dtype=dtype) ref_point = ref_point_raw.to(dtype=dtype) expected_cell_bounds = expected_cell_bounds_raw.to(dtype=dtype) # test non-batch cell_bounds = compute_non_dominated_hypercell_bounds_2d( pareto_Y_sorted=pareto_Y, ref_point=ref_point, ) num_matches = ( (cell_bounds.unsqueeze(0) == expected_cell_bounds.unsqueeze(1)) .all(dim=-1) .any(dim=0) .sum() ) self.assertTrue(num_matches, 7) # test batch pareto_Y_batch = torch.stack( [pareto_Y, pareto_Y + pareto_Y.max(dim=-2).values], dim=0 ) # filter out points that are not better than ref_point ref_point = pareto_Y.max(dim=-2).values pareto_Y_batch = _pad_batch_pareto_frontier( Y=pareto_Y_batch, ref_point=ref_point, is_pareto=True ) # sort pareto_Y_batch pareto_Y_batch = pareto_Y_batch.gather( index=torch.argsort(pareto_Y_batch[..., :1], dim=-2).expand( pareto_Y_batch.shape ), dim=-2, ) cell_bounds = compute_non_dominated_hypercell_bounds_2d( ref_point=ref_point, pareto_Y_sorted=pareto_Y_batch, ) # check hypervolume max_vals = (pareto_Y + pareto_Y).max(dim=-2).values clamped_cell_bounds = torch.min(cell_bounds, max_vals) total_hv = (max_vals - ref_point).prod() nondom_hv = ( (clamped_cell_bounds[1] - clamped_cell_bounds[0]) .prod(dim=-1) .sum(dim=-1) ) hv = total_hv - nondom_hv self.assertEqual(hv[0].item(), 0.0) self.assertEqual(hv[1].item(), 49.0)
def test_sort(self, device): # on CUDA 2048 vs >2048 have different code path for the dim being sorted for SIZE in (4, 2049): x = torch.rand(4, SIZE, device=device) res1val, res1ind = torch.sort(x) # Test inplace y = x.clone() y_inds = torch.tensor((), dtype=torch.int64, device=device) torch.sort(y, out=(y, y_inds)) x_vals, x_inds = torch.sort(x) self.assertEqual(x_vals, y) self.assertEqual(x_inds, y_inds) # Test use of result tensor res2val = torch.tensor((), device=device) res2ind = torch.tensor((), device=device, dtype=torch.long) torch.sort(x, out=(res2val, res2ind)) self.assertEqual(res1val, res2val, atol=0, rtol=0) self.assertEqual(res1ind, res2ind, atol=0, rtol=0) self.assertEqual(torch.argsort(x), res1ind) self.assertEqual(x.argsort(), res1ind) # Test sorting of random numbers self.assertIsOrdered('ascending', x, res2val, res2ind, 'random') # Test simple sort self.assertEqual(torch.sort( torch.tensor((50, 40, 30, 20, 10), device=device))[0], torch.tensor((10, 20, 30, 40, 50), device=device), atol=0, rtol=0) # Test that we still have proper sorting with duplicate keys x = torch.floor(torch.rand(4, SIZE, device=device) * 10) torch.sort(x, out=(res2val, res2ind)) self.assertIsOrdered('ascending', x, res2val, res2ind, 'random with duplicate keys') # DESCENDING SORT x = torch.rand(4, SIZE, device=device) res1val, res1ind = torch.sort(x, x.dim() - 1, True) # Test use of result tensor res2val = torch.tensor((), device=device) res2ind = torch.tensor((), device=device, dtype=torch.long) torch.sort(x, x.dim() - 1, True, out=(res2val, res2ind)) self.assertEqual(res1val, res2val, atol=0, rtol=0) self.assertEqual(res1ind, res2ind, atol=0, rtol=0) self.assertEqual(torch.argsort(x, x.dim() - 1, True), res1ind) self.assertEqual(x.argsort(x.dim() - 1, True), res1ind) # Test sorting of random numbers self.assertIsOrdered('descending', x, res2val, res2ind, 'random') # Test simple sort task self.assertEqual(torch.sort( torch.tensor((10, 20, 30, 40, 50), device=device), 0, True)[0], torch.tensor((50, 40, 30, 20, 10), device=device), atol=0, rtol=0) # Test that we still have proper sorting with duplicate keys self.assertIsOrdered('descending', x, res2val, res2ind, 'random with duplicate keys') # Test sorting with NaNs x = torch.rand(4, SIZE, device=device) x[1][2] = float('NaN') x[3][0] = float('NaN') torch.sort(x, out=(res2val, res2ind)) self.assertIsOrdered('ascending', x, res2val, res2ind, 'random with NaNs') torch.sort(x, out=(res2val, res2ind), descending=True) self.assertIsOrdered('descending', x, res2val, res2ind, 'random with NaNs')
TP = 0 FP = 0 # Test the model start_time = time.time() with torch.no_grad(): for line in test_normal_loader: if len(line) < window_size: FP += 1 for i in range(len(line) - window_size): seq = line[i:i + window_size] label = line[i + window_size] seq = torch.tensor(seq, dtype=torch.float).view(-1, window_size).to(device) x_onehot = torch.nn.functional.one_hot(seq.long(), num_classes).float() label = torch.tensor(label).view(-1).to(device) output = model(x_onehot) predicted = torch.argsort(output, 1)[0][-num_candidates:] if label not in predicted: FP += 1 break with torch.no_grad(): for line in test_abnormal_loader: if len(line) < window_size: TP += 1 for i in range(len(line) - window_size): seq = line[i:i + window_size] label = line[i + window_size] seq = torch.tensor(seq, dtype=torch.float).view(-1, window_size).to(device) x_onehot = torch.nn.functional.one_hot(seq.long(), num_classes).float() label = torch.tensor(label).view(-1).to(device) output = model(x_onehot)
def gen_objectpairs(proposals,filter_scores): objectpairs_list = [] labels= proposals.get_field('labels') object_scores = proposals.get_field('scores') bounding_box = proposals.bbox img_size = proposals.size num_boxes = len(proposals) if num_boxes !=0: a=torch.linspace(0,num_boxes-1,num_boxes).long() objectpairs_idx = torch.cat((a.repeat(a.size(0),1).permute(1,0).contiguous().view(-1,1),a.repeat(1,a.size(0)).permute(1,0).view(-1,1)),1) detection_scores = object_scores.repeat(a.size(0),1).permute(1,0).contiguous().view(-1,1)*object_scores.repeat(1,a.size(0)).permute(1,0).view(-1,1) filter_scores = filter_scores.view(-1,1) objectpairs_scores = detection_scores * filter_scores ignore_idx = (torch.ones((num_boxes,num_boxes))-torch.eye(num_boxes)).view(1,-1).squeeze(0) remain_idx = ignore_idx ==1 objectpairs_idx = objectpairs_idx[remain_idx] objectpairs_scores = objectpairs_scores[remain_idx] ## idx = torch.argsort(objectpairs_scores,dim=0,descending=True) idx = idx.view(1,-1).squeeze(0) objectpairs_idx = objectpairs_idx[idx] objectpairs_scores = objectpairs_scores[idx].view(-1) ## if len(objectpairs_scores) !=0: subject_boundingboxes = bounding_box[objectpairs_idx[:,0],:] object_boundingboxes = bounding_box[objectpairs_idx[:,1],:] subject_category = labels[objectpairs_idx[:,0]] object_category = labels[objectpairs_idx[:,1]] subject_scores = object_scores[objectpairs_idx[:,0]] object_scores = object_scores[objectpairs_idx[:,1]] xs = torch.min(subject_boundingboxes[:,0],object_boundingboxes[:,0]).view(-1,1) ys = torch.min(subject_boundingboxes[:,1],object_boundingboxes[:,1]).view(-1,1) xm = torch.max(subject_boundingboxes[:,2],object_boundingboxes[:,2]).view(-1,1) ym = torch.max(subject_boundingboxes[:,3],object_boundingboxes[:,3]).view(-1,1) boxes = torch.cat((xs,ys,xm,ym),1) ## keep = nms(subject_boundingboxes,object_boundingboxes,objectpairs_scores,subject_category,object_category,thresh=0.25) boxes = boxes[keep] subject_boundingboxes = subject_boundingboxes[keep] object_boundingboxes = object_boundingboxes[keep] subject_category = subject_category[keep] object_category = object_category[keep] subject_scores = subject_scores[keep] object_scores = object_scores[keep] objectpairs_scores = objectpairs_scores[keep] ## objectpairs = BoxList(boxes, img_size, mode="xyxy") objectpairs.add_field("subject_boundingboxes", subject_boundingboxes) objectpairs.add_field("object_boundingboxes", object_boundingboxes) objectpairs.add_field("subject_category", subject_category) objectpairs.add_field("object_category", object_category) objectpairs.add_field("subject_scores", subject_scores) objectpairs.add_field("object_scores", object_scores) objectpairs.add_field("objectpairs_scores", objectpairs_scores) objectpairs_list.append(objectpairs) return objectpairs_list objectpairs = BoxList(torch.tensor([],device=bounding_box.device).view(-1,4), img_size, mode="xyxy") objectpairs.add_field("subject_boundingboxes", torch.tensor([],device=bounding_box.device).view(-1,4)) objectpairs.add_field("object_boundingboxes", torch.tensor([],device=bounding_box.device).view(-1,4)) objectpairs.add_field("subject_category", labels.new_empty((0))) objectpairs.add_field("object_category", labels.new_empty((0))) objectpairs.add_field("subject_scores", labels.new_empty((0))) objectpairs.add_field("object_scores", labels.new_empty((0))) objectpairs.add_field("objectpairs_scores", labels.new_empty((0))) objectpairs_list.append(objectpairs) return objectpairs_list
def test_broadcast_benchmark(self): N = 12 H = 8 L = 1000 S = 1000 E = 64 D = 64 C = 200 I = 5 B = 63 Q = torch.randn(N, H, L, E).cuda() lengths = torch.full((N,), L, dtype=torch.int32).cuda() groups, counts = cluster_queries(Q, lengths, C, I, B) sorted_g, sorted_gi = torch.sort(groups.view(N*H, -1), dim=-1) sorted_rev_gi = torch.argsort(sorted_gi, dim=-1) q_offset = torch.arange(N*H, device=Q.device).unsqueeze(-1) * L q_flat = (sorted_gi + q_offset).reshape(-1) Q_grouped = aggregate(Q, groups, 1/counts.float()) K = torch.randn(N, H, S, E).cuda() QK = torch.einsum("nhle,nhse->nhls", Q_grouped, K) V = torch.randn(N, H, S, E).cuda() A = F.softmax(QK, dim=-1) V_new = torch.einsum("nhls,nhse->nhle", A, V) V_broadcast = torch.zeros((N, H, L, E), dtype=V_new.dtype).cuda() factors = torch.ones_like(counts, dtype=torch.float32) V_sorted_broadcast = clustered_broadcast( V_new, sorted_g.view(N, H, L), counts, factors, V_broadcast ) q_rev_flat = (sorted_rev_gi + q_offset).reshape(-1) V_broadcast = V_sorted_broadcast.reshape(-1, D).index_select( 0, q_rev_flat).view(N, H, L, D) for i in range(2000): factors = torch.ones_like(counts, dtype=torch.float32) V_sorted_broadcast = clustered_broadcast( V_new, sorted_g.view(N, H, L), counts, factors, V_broadcast ) q_rev_flat = (sorted_rev_gi + q_offset).reshape(-1) V_broadcast = V_sorted_broadcast.reshape(-1, D).index_select( 0, q_rev_flat).view(N, H, L, D) s = torch.cuda.Event(enable_timing=True) e = torch.cuda.Event(enable_timing=True) s.record() factors = torch.ones_like(counts, dtype=torch.float32) V_sorted_broadcast = clustered_broadcast( V_new, sorted_g.view(N, H, L), counts, factors, V_broadcast ) q_rev_flat = (sorted_rev_gi + q_offset).reshape(-1) V_broadcast = V_sorted_broadcast.reshape(-1, D).index_select( 0, q_rev_flat).view(N, H, L, D) e.record() torch.cuda.synchronize() t_broadcast = s.elapsed_time(e) for i in range(200): V_broadcast_2 = broadcast( V_new, groups, torch.ones_like(counts, dtype=torch.float32), torch.zeros((N, H, L, E), device=Q.device) ) s = torch.cuda.Event(enable_timing=True) e = torch.cuda.Event(enable_timing=True) s.record() V_broadcast_2 = broadcast( V_new, groups, torch.ones_like(counts, dtype=torch.float32), torch.zeros((N, H, L, E), device=Q.device) ) e.record() torch.cuda.synchronize() t_broadcast_2 = s.elapsed_time(e) print("B1: {}, B2: {}".format(t_broadcast, t_broadcast_2))
def train_IL(model, train_loader, labeled_eval_loader, unlabeled_eval_loader, args): optimizer = SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=args.step_size, gamma=args.gamma) criterion1 = nn.CrossEntropyLoss() criterion2 = BCE() for epoch in range(args.epochs): loss_record = AverageMeter() model.train() exp_lr_scheduler.step() w = args.rampup_coefficient * ramps.sigmoid_rampup( epoch, args.rampup_length) for batch_idx, ((x, x_bar), label, idx) in enumerate(tqdm(train_loader)): x, x_bar, label = x.to(device), x_bar.to(device), label.to(device) output1, output2, feat = model(x) output1_bar, output2_bar, _ = model(x_bar) prob1, prob1_bar, prob2, prob2_bar = F.softmax( output1, dim=1), F.softmax(output1_bar, dim=1), F.softmax( output2, dim=1), F.softmax(output2_bar, dim=1) mask_lb = label < args.num_labeled_classes rank_feat = (feat[~mask_lb]).detach() rank_idx = torch.argsort(rank_feat, dim=1, descending=True) rank_idx1, rank_idx2 = PairEnum(rank_idx) rank_idx1, rank_idx2 = rank_idx1[:, :args. topk], rank_idx2[:, :args.topk] rank_idx1, _ = torch.sort(rank_idx1, dim=1) rank_idx2, _ = torch.sort(rank_idx2, dim=1) rank_diff = rank_idx1 - rank_idx2 rank_diff = torch.sum(torch.abs(rank_diff), dim=1) target_ulb = torch.ones_like(rank_diff).float().to(device) target_ulb[rank_diff > 0] = -1 prob1_ulb, _ = PairEnum(prob2[~mask_lb]) _, prob2_ulb = PairEnum(prob2_bar[~mask_lb]) loss_ce = criterion1(output1[mask_lb], label[mask_lb]) label[~mask_lb] = (output2[~mask_lb] ).detach().max(1)[1] + args.num_labeled_classes loss_ce_add = w * criterion1( output1[~mask_lb], label[~mask_lb] ) / args.rampup_coefficient * args.increment_coefficient loss_bce = criterion2(prob1_ulb, prob2_ulb, target_ulb) consistency_loss = F.mse_loss(prob1, prob1_bar) + F.mse_loss( prob2, prob2_bar) loss = loss_ce + loss_bce + loss_ce_add + w * consistency_loss loss_record.update(loss.item(), x.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() print('Train Epoch: {} Avg Loss: {:.4f}'.format( epoch, loss_record.avg)) print('test on labeled classes') args.head = 'head1' test(model, labeled_eval_loader, args) print('test on unlabeled classes') args.head = 'head2' test(model, unlabeled_eval_loader, args)
def predict_select(self, confidence: torch.Tensor, box_predict: torch.Tensor, default_box: torch.Tensor, filter=True): if len(confidence.size()) > 2: batch_proposal = [] batch_conf = [] batch_offset = [] for index in range(confidence.size()[0]): # this_default_box = default_box[:confidence.size()[-2], :] this_default_box = default_box.clone() this_scores = confidence[index, :, 1] # this_scores = confidence[index, 1:].max() bbox_deltas = box_predict[index] this_proposal = offset_to_box(this_default_box, bbox_deltas) this_proposal = torch.clamp(this_proposal, min=cfg.left_border, max=cfg.right_border) keep = this_proposal[:, 0] <= this_proposal[:, 1] this_proposal = this_proposal[keep] bbox_deltas = bbox_deltas[keep] keep = this_scores >= 0.5 this_proposal = this_proposal[keep] bbox_deltas = bbox_deltas[keep] this_scores = this_scores[keep] # ws = this_proposal[:, 1] - this_proposal[:, 0] + 1 # min_keep = ws < cfg.box_max_size # max_keep = ws > cfg.box_min_size # keep = torch.min(min_keep, max_keep) # print('keep:{}'.format(keep.sum())) # this_proposal = this_proposals[keep] # this_scores = this_scores[keep] if this_proposal.size()[-2] > cfg.be_topk: order = torch.argsort(this_scores)[-cfg.be_topk:] this_proposal = this_proposal[order] this_scores = this_scores[order] # keep2, count = nms2(this_proposal, this_scores) keep2 = nms(this_proposal, this_scores, cfg.nms_threash) keep2 = keep2[:cfg.af_topk] # print(count) batch_offset.append(bbox_deltas[keep2]) this_proposal = this_proposal[keep2] batch_proposal.append(this_proposal) batch_conf.append(this_scores[keep2]) return batch_proposal, batch_conf, batch_offset else: this_proposal = offset_to_box(default_box, box_predict) this_proposal = torch.clamp(this_proposal, min=cfg.left_border, max=cfg.right_border) keep = this_proposal[:, 0] < this_proposal[:, 1] this_proposal = this_proposal[keep] box_predict = box_predict[keep] confidence = confidence[:, 1] confidence = confidence[keep] ####################### keep3 = confidence >= 0.5 # keep3 = confidence >= 0.8 if (keep3.sum().item() > 0): this_proposal = this_proposal[keep3] box_predict = box_predict[keep3] confidence = confidence[keep3] if this_proposal.size()[-2] > cfg.be_topk: order = torch.argsort(confidence)[-cfg.be_topk:] this_proposal = this_proposal[order] # confidence = confidence.view(-1, 1)[order] confidence = confidence[order] # keep2, count = nms2(this_proposal, confidence) keep2 = nms(this_proposal, confidence, cfg.nms_threash) keep2 = keep2[:cfg.af_topk] # print(count) this_proposal = this_proposal[keep2] this_conf = confidence[keep2] box_predict = box_predict[keep2] return this_proposal, this_conf, box_predict
def attack(self, entry): # TODO: a problem with get_important_scores is that # it does not care the numeber of tokens. # since BERT can only accept 512 tokens at max, # if [UNK] is inserted at a place after 512th token, # the importance score is essentially invalid. # potential solution: # 1. modify the tokenize to pass # each entry into BertTokenizer and convert it back # to keep truncated text with at max 512 tokens. # 2. (currently used) simply skips if mask_token_index is empty entry['phrase_changes'] = 0 entry['word_changes'] = 0 entry['changes'] = [] entry['pred_success'] = True entry['success'] = False entry['word_num'] = len(entry['words']) phrase_lengths = [n for n in entry['n_words_in_phrases'] if n > 1] entry['phrase_num'] = len(phrase_lengths) entry['phrase_len'] = sum(phrase_lengths) entry['query_num'] = 0 entry['final_adv'] = None # 1. retrieve logits and label from the target model encoded = self.tokenizer(entry['text'], padding=True, truncation=True, return_token_type_ids=False, return_tensors="pt") input_ids = encoded['input_ids'].to(self.device) attention_mask = encoded['attention_mask'].to(self.device) orig_logits = self.target_model(input_ids, attention_mask).logits.squeeze() orig_probs = torch.softmax(orig_logits, -1) orig_label = torch.argmax(orig_probs) max_prob = torch.max(orig_probs) if orig_label != entry['label']: entry['pred_success'] = False return entry # filter out stop_words, digits & symbol combination filtered_indices = filter_unwanted_phrases(self.stop_words, entry['phrases']) masked_phrases = get_unk_masked(entry['text'], entry['phrase_offsets'], filtered_indices) importance_scores, _ = get_important_scores(masked_phrases, self.tokenizer, self.target_model, orig_label, max_prob, orig_probs, self.device) entry['query_num'] += len(masked_phrases) # this is the index after the filter and # cannot only applied to importance scores and filtered_indices sorted_filtered_indices_np = torch.argsort( importance_scores, dim=-1, descending=True).data.cpu().numpy() importance_scores_np = importance_scores.data.cpu().numpy() # obtain correct indices that can be used to index the entry dict sorted_indices_np = np.array( filtered_indices)[sorted_filtered_indices_np] sorted_importance = importance_scores_np[sorted_filtered_indices_np] sorted_phrases = np.array(entry['phrases'])[sorted_indices_np] sorted_phrase_offsets = np.array( entry['phrase_offsets'])[sorted_indices_np] sorted_n_words_in_phrase = np.array( entry['n_words_in_phrases'])[sorted_indices_np] # up to this point, # sorted_phrases is a sorted numPy array containing the filtered phrases ranked by importance # sorted_n_words_in_phrase is a sorted numPy array containing the number of words in each filtered phrases ranked by importance # sorted_importance is a sorted PyTorch Tensor containing importance scores ranked by importance max_change_threshold = len(entry['phrases']) # record how many perturbations have been made phrase_changes = 0 word_changes = 0 changes = [] text = entry['text'] phrases = entry['phrases'] phrase_offsets = entry['phrase_offsets'] n_words_in_phrases = entry['n_words_in_phrases'] for idx, i in enumerate(sorted_indices_np): # break when attack is successful or changes exceed threshold if (idx + 1) / max_change_threshold > self.change_threshold: break phrase_masked_list = get_phrase_masked_list( text, [phrase_offsets[i]], [n_words_in_phrases[i]])[0] attack_results = [] for j, masked_text in enumerate(phrase_masked_list): # 3. get masked token candidates from MLM encoded = self.tokenizer(masked_text, truncation=True, padding=True, return_token_type_ids=False, return_tensors='pt') input_ids = encoded['input_ids'].to(self.device) attention_mask = encoded['attention_mask'].to(self.device) mask_token_index = torch.where( input_ids == self.tokenizer.mask_token_id)[-1] # skip if part or all of masks exceed max_length if len(mask_token_index) != j + 1: continue candidates_list = [] if len(phrase_masked_list) == 1: input_ids[0, mask_token_index[ 0]] = self.tokenizer.convert_tokens_to_ids(phrases[i]) #encoded = self.tokenizer(text, # truncation=True, # padding=True, # return_token_type_ids=False, # return_tensors='pt') #input_ids = encoded['input_ids'].to(self.device) #attention_mask = encoded['attention_mask'].to(self.device) candidates_list = get_word_substitutes( input_ids, attention_mask, mask_token_index, self.tokenizer, self.mlm_model, K=self.k, threshold=self.conf_thres) entry['query_num'] += len(input_ids) elif len(phrase_masked_list) > 1: candidates_list, qn = get_phrase_substitutes( input_ids, attention_mask, mask_token_index, self.stop_words, self.tokenizer, self.mlm_model, self.device, beam_width=self.beam_width, K=self.k) entry['query_num'] += qn mask_text = f" {' '.join([self.tokenizer.mask_token] * (j+1))} " for candidates in candidates_list: perturbed_text = masked_text candidate = ' '.join(candidates) if phrases[i] == candidate: continue if '##' in candidate: continue if not phrase_is_wanted(self.stop_words, candidate): continue # replace the mask_text with candidate perturbed_text = perturbed_text.replace( mask_text, candidate, 1) # semantic check -> if the phrase changes too much #if len(candidates) > 1: #seq_embeddings = self.sent_encoder([candidate, phrases[i]]) seq_embeddings = self.sent_encoder( [perturbed_text, entry['text']]) semantic_sim = np.dot(*seq_embeddings) if semantic_sim < self.sent_semantic_thres: continue importance_score, perturbed_label = get_important_scores( [perturbed_text], self.tokenizer, self.target_model, orig_label, max_prob, orig_probs, self.device) importance_score = importance_score.squeeze() entry['query_num'] += 1 perturbed_label = perturbed_label.squeeze() if perturbed_label != orig_label: attack_results = [ (perturbed_label == orig_label, j, candidate, perturbed_text, importance_score) ] entry['success'] = True if n_words_in_phrases[i] > 1: entry['phrase_changes'] = phrase_changes + 1 entry[ 'word_changes'] = word_changes + n_words_in_phrases[ i] changes.append((phrases[i], candidate)) entry['changes'] = changes entry['final_adv'] = perturbed_text return entry attack_results.append( (perturbed_label == orig_label, j, candidate, perturbed_text, importance_score)) attack_results = sorted(attack_results, key=lambda x: x[-1], reverse=True) if len(attack_results) == 0: #print('no candidates for: ', phrases[i]) continue # no matter what, changes plus 1 if n_words_in_phrases[i] > 1: phrase_changes += 1 word_changes += n_words_in_phrases[i] # attack the max confidence one when there's no success result = attack_results[0] text = result[3] n_words_in_phrases[i] = result[1] + 1 # update perturbed token to phrases and phrase offsets length_diff = len(phrases[i]) - len(result[2]) if length_diff != 0: new_offsets = phrase_offsets[:i] for change_i in range(i, len(phrases)): start = phrase_offsets[change_i][0] end = phrase_offsets[change_i][1] - length_diff # start not change for index position if change_i != i: start -= length_diff new_offsets.append([start, end]) phrase_offsets = new_offsets changes.append((phrases[i], result[2])) phrases[i] = result[2] text = result[3] entry['success'] = False entry['phrase_changes'] = phrase_changes entry['word_changes'] = word_changes entry['changes'] = changes entry['final_adv'] = text return entry
def run(): args = parser.parse_args() data = args.data nlayer = args.nlayer file_path = args.file_path #'/content/drive/My Drive/Master_Final_Project/Genetic_attack/Code/nlp_adversarial_example_master_pytorch/glove.840B.300d.txt'#'/lustre/scratch/scratch/ucabdc3/lstm_attack' save_path = os.path.join(file_path, 'model_params') MAX_VOCAB_SIZE = 50000 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # with open(os.path.join(file_path, 'dataset_%d.pkl' %MAX_VOCAB_SIZE), 'rb') as f: # dataset = pickle.load(f) with open('aux_files/dataset_%d.pkl' % MAX_VOCAB_SIZE, 'rb') as f: dataset = pickle.load(f) # skip_list = np.load('aux_files/missed_embeddings_counter_%d.npy' %MAX_VOCAB_SIZE) embedding_matrix = np.load('aux_files/embeddings_glove_%d.npy' % (MAX_VOCAB_SIZE)) embedding_matrix = torch.tensor(embedding_matrix.T).to(device) # dist = np.load(('aux_files/dist_counter_%d.npy' %(MAX_VOCAB_SIZE))) # dist[0,:] = 100000 # dist[:,0] = 100000 # goog_lm = LM() # pytorch max_len = 100 # padded_train_raw = pad_sequences(dataset.train_seqs2, maxlen = max_len, padding = 'post') # padded_test_raw = pad_sequences(dataset.test_seqs2, maxlen = max_len, padding = 'post') # # TrainSet # data_set = Data_infor(padded_train_raw, dataset.train_y) # num_train = len(data_set) # indx = list(range(num_train)) # train_set = Subset(data_set, indx) if data.lower() == 'imdb': data_path = 'aclImdb' bert = BertModel.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') data_processed = pre_processing(data_path, MAX_VOCAB_SIZE, max_len) tokenizer_select = args.tokenizer tokenizer_selection = tokenizer_select if tokenizer_selection.lower() != 'bert': data_processed.processing() train_sequences, test_sequences = data_processed.bert_indx(tokenizer) print('Self preprocessing') else: data_processed.bert_tokenize(tokenizer) train_sequences, test_sequences = data_processed.bert_indx(tokenizer) print('BERT tokenizer') train_text_init, test_text_init = data_processed.numerical( tokenizer, train_sequences, test_sequences) # train_text = pad_sequences(train_text_init, maxlen = max_len, padding = 'post') test_text = pad_sequences(test_text_init, maxlen=max_len, padding='post') # orig_test_text = pad_sequences(dataset.test_seqs2, maxlen = max_len, padding = 'post') # train_target = data_processed.all_train_labels test_target = data_processed.all_test_labels SAMPLE_SIZE = args.sample_size test_data, all_test_data = data_loading(test_text, test_target, SAMPLE_SIZE) # TestSet batch_size = 1 # data_set = Data_infor(padded_test_raw, dataset.test_y) # num_test = len(data_set) # indx = list(range(num_test)) # ## all_test_set = Subset(data_set, indx) # indx = random.sample(indx, SAMPLE_SIZE) # test_set = Subset(data_set, indx) # # test_loader = DataLoader(test_set, batch_size = batch_size, shuffle = False) test_loader_bert = DataLoader(test_data, batch_size=batch_size, shuffle=False) all_test_loader_bert = DataLoader(all_test_data, batch_size=128, shuffle=True) lstm_size = 128 rnn_state_save = os.path.join(save_path, 'best_bert_0.7_0.001_bert_150') model = bert_lstm( bert, 2, False, nlayer, lstm_size, True, 0.7 ) # batch_size=batch_size, embedding_matrix = embedding_matrix, hidden_size = lstm_size, kept_prob = 0.73, num_layers=2, bidirection=True) model.eval() model.load_state_dict(torch.load(rnn_state_save)) model = model.to(device) model.eval() test_pred = torch.tensor([]) test_targets = torch.tensor([]) with torch.no_grad(): for batch_index, (seqs, length, target) in enumerate(all_test_loader_bert): seqs = seqs.type(torch.LongTensor) len_order = torch.argsort(length, descending=True) length = length[len_order] seqs = seqs[len_order] target = target[len_order] seqs, target, length = seqs.to(device), target.to( device), length.to(device) output, pred_out = model.pred(seqs, length, False) test_pred = torch.cat((test_pred, pred_out.cpu()), dim=0) test_targets = torch.cat( (test_targets, target.type(torch.float).cpu())) accuracy = model.evaluate_accuracy(test_pred.numpy(), test_targets.numpy()) print('Test Accuracy:{:.4f}.'.format(accuracy)) # np.save(os.path.join(save_path,'accuracy.npy'), np.array(accuracy)) print('\n') n1 = 8 n2 = 4 pop_size = 60 max_iters = 20 n_prefix = 6 n_suffix = 6 batch_model = bert_lstm( bert, 2, False, nlayer, lstm_size, True, 0.7 ) #SentimentAnalysis(batch_size=pop_size, embedding_matrix = embedding_matrix, hidden_size = lstm_size, kept_prob = 0.73, num_layers=2, bidirection=True) batch_model.eval() batch_model.load_state_dict(torch.load(rnn_state_save)) batch_model.to(device) neighbour_model = bert_lstm( bert, 2, False, nlayer, lstm_size, True, 0.7 ) #SentimentAnalysis(batch_size=batch_size, embedding_matrix = embedding_matrix, hidden_size = lstm_size, kept_prob = 0.73, num_layers=2, bidirection=True) neighbour_model.eval() neighbour_model.load_state_dict(torch.load(rnn_state_save)) neighbour_model.to(device) lm_model = gpt_2_get_words_probs() ga_attack = GeneticAttack_pytorch(model, batch_model, neighbour_model, compute_dis, lm_model, tokenizer=tokenizer, max_iters=max_iters, dataset=dataset, pop_size=pop_size, n1=n1, n2=n2, n_prefix=n_prefix, n_suffix=n_suffix, use_lm=True, use_suffix=True) # TEST_SIZE = args.test_size # order_pre = 0 # n = 0 # seq_success = [] # seq_orig = [] # seq_orig_label = [] # word_varied = [] # # seq_success_path = os.path.join(save_path,'seq_success_perplexity_bert.npy') # # seq_orig_path = os.path.join(save_path,'seq_orig_perplexity_bert.npy') # # seq_orig_label_path = os.path.join(save_path,'seq_orig_label_perplexity_bert.npy') # # word_varied_path = os.path.join(save_path,'word_varied_perplexity_bert.npy') # # if order_pre != 0: # # seq_success = np.load(seq_success_path, allow_pickle = True).tolist() # # seq_orig = np.load(seq_orig_path).tolist() # # seq_orig_label = np.load(seq_orig_label_path).tolist() # # word_varied = np.load(word_varied_path, allow_pickle = True).tolist() # # n = len(seq_success) # for order, (seq, l, target) in enumerate(test_loader_bert): # if order>=order_pre: # seq_len = np.sum(np.sign(seq.numpy())) # seq = seq.type(torch.LongTensor) # seq, l = seq.to(device), l.to(device) # model.eval() # with torch.no_grad(): # orig_pred = np.argmax(model.pred(seq, l).cpu().detach().numpy()) # if orig_pred != target.numpy()[0]: # # print('Wrong original prediction') # # print('----------------------') # continue # if seq_len > 100: # # print('Sequence is too long') # # print('----------------------') # continue # print('Sequence number:{}'.format(order)) # print('Length of sentence: {}, Number of samples:{}'.format(l.item(), n+1)) # seq_orig.append(seq[0].cpu().detach().numpy()) # seq_orig_label.append(target.numpy()[0]) # target = int(1-target.numpy()[0]) # seq_success.append(ga_attack.attack(seq, target, l.type(torch.LongTensor))) # if None not in np.array(seq_success[n]): # w_be = [dataset.inv_dict[seq_orig[n][i]] for i in list(np.where(seq_success[n] != seq_orig[n])[0])] # w_to = [dataset.inv_dict[seq_success[n][i]] for i in list(np.where(seq_success[n] != seq_orig[n])[0])] # for i in range(len(w_be)): # print('{} ----> {}'.format(w_be[i], w_to[i])) # word_varied.append([w_be]+[w_to]) # else: # print('Fail') # print('----------------------') # n += 1 # np.save(seq_success_path, np.array(seq_success)) # np.save(seq_orig_path, np.array(seq_orig)) # np.save(seq_orig_label_path, np.array(seq_orig_label)) # np.save(word_varied_path, np.array(word_varied, dtype=object)) # if n>TEST_SIZE: # break TEST_SIZE = args.test_size order_pre = 0 n = 0 seq_success = [] seq_orig = [] seq_orig_label = [] word_varied = [] orig_list = [] adv_list = [] dist_list = [] # seq_success_path = os.path.join(save_path,'seq_success_perplexity_bert.npy') # seq_orig_path = os.path.join(save_path,'seq_orig_perplexity_bert.npy') # seq_orig_label_path = os.path.join(save_path,'seq_orig_label_perplexity_bert.npy') # word_varied_path = os.path.join(save_path,'word_varied_perplexity_bert.npy') # if order_pre != 0: # seq_success = np.load(seq_success_path, allow_pickle = True).tolist() # seq_orig = np.load(seq_orig_path).tolist() # seq_orig_label = np.load(seq_orig_label_path).tolist() # word_varied = np.load(word_varied_path, allow_pickle = True).tolist() # n = len(seq_success) for order, (seq, l, target) in enumerate(test_loader_bert): if order >= order_pre: seq_len = np.sum(np.sign(seq.numpy())) seq = seq.type(torch.LongTensor) seq, l = seq.to(device), l.to(device) model.eval() with torch.no_grad(): prediction = model.pred(seq, l, False)[1].cpu().detach().numpy() orig_pred = np.argmax(prediction) if orig_pred != target: # print('Wrong original prediction') # print('----------------------') continue if seq_len > 100: # print('Sequence is too long') # print('----------------------') continue print('Sequence number:{}'.format(order)) print('Predicted value:{}'.format(prediction)) print('Length of sentence: {}, Number of samples:{}'.format( l.item(), n + 1)) # seq_orig.append(seq[0].cpu().detach().numpy()) # seq_orig_label.append(target.numpy()[0]) target = int(1 - target) # seq_success.append(ga_attack.attack(seq, target, l.type(torch.LongTensor))) # if None not in np.array(seq_success[n]): # w_be = [dataset.inv_dict[seq_orig[n][i]] for i in list(np.where(seq_success[n] != seq_orig[n])[0])] # w_to = [dataset.inv_dict[seq_success[n][i]] for i in list(np.where(seq_success[n] != seq_orig[n])[0])] # for i in range(len(w_be)): # print('{} ----> {}'.format(w_be[i], w_to[i])) # word_varied.append([w_be]+[w_to]) # else: # print('Fail') # print('----------------------') # n += 1 # np.save(seq_success_path, np.array(seq_success)) # np.save(seq_orig_path, np.array(seq_orig)) # np.save(seq_orig_label_path, np.array(seq_orig_label)) # np.save(word_varied_path, np.array(word_varied, dtype=object)) # if n>TEST_SIZE: # break # orig_list.append(seq[0].cpu().detach().numpy()) x_adv, seq_out = ga_attack.attack(seq, target, l.type(torch.LongTensor)) orig_list.append(seq) adv_list.append(x_adv) if x_adv is None: print('%d failed' % (order)) dist_list.append(100000) else: num_changes = np.sum(np.array(seq_out) != np.array(x_adv)) print('%d - %d changed.' % (order, num_changes)) dist_list.append(num_changes) # display_utils.visualize_attack(sess, model, dataset, x_orig, x_adv) w_be = [ seq_out[i] for i in list( np.where(np.array(seq_out) != np.array(x_adv))[0]) ] w_to = [ x_adv[i] for i in list( np.where(np.array(seq_out) != np.array(x_adv))[0]) ] for i in range(len(w_be)): print('{} ----> {}'.format(w_be[i], w_to[i])) print('--------------------------') n += 1 if n > TEST_SIZE: break orig_len = [x.shape[1] for x in orig_list] normalized_dist_list = [ dist_list[i] / orig_len[i] for i in range(len(orig_list)) ] SUCCESS_THRESHOLD = 0.25 successful_attacks = [ x <= SUCCESS_THRESHOLD for x in normalized_dist_list ] print('Attack success rate : {:.2f}%'.format( np.mean(successful_attacks) * 100)) SUCCESS_THRESHOLD = 0.2 successful_attacks = [ x <= SUCCESS_THRESHOLD for x in normalized_dist_list ] print('Attack success rate : {:.2f}%'.format( np.mean(successful_attacks) * 100))
def search( self, query, top_n=5, use_top_n_sentences=20, rank_with_next_sentence_prediction=True, ): query_embedding = embed_sentences([query]).cpu() similarities = calculate_similarities( query_embedding, self._embeddings, ) charity_similarities = pd.DataFrame({ 'charity': self._embeddings_charity_index, 'similarity': similarities, }) best_match_charities = (charity_similarities.sort_values( 'similarity', ascending=False).groupby('charity').head( use_top_n_sentences).groupby('charity').mean().sort_values( 'similarity', ascending=False).head(top_n)) best_match_indices = best_match_charities.index.tolist() matched_charities = [self._charities[i] for i in best_match_indices] if rank_with_next_sentence_prediction: descriptions = [ charity.description for charity in matched_charities ] probabilities = calculate_next_sentence_probability( query, descriptions, ) rank_indices = torch.argsort(probabilities).numpy()[::-1] charities = [matched_charities[i] for i in rank_indices] return [ CharitySearchResult( name=charity.name, url=charity.url, description=charity.description, score=score, ) for (charity, score) in zip( charities, probabilities.tolist(), ) ] else: matched_similarities = best_match_charities['similarity'].tolist() return [ CharitySearchResult( name=charity.name, url=charity.url, description=charity.description, score=score, ) for (charity, score) in zip( matched_charities, matched_similarities, ) ]
def replay_buffer_training(self, sample, train_results, n): s, a, r, t, stag = [sample[k] for k in ['s', 'a', 'r', 't', 'stag']] self.train_mode() self.alpha = 0 with torch.no_grad(): self.pi_net(stag) pi_tag_1 = self.pi_net.sample(self.rbi_learner_samples) pi_tag_2 = self.pi_net.sample(self.rbi_learner_samples) q_target_1 = self.q_target_1(stag, pi_tag_1).mean(dim=0) q_target_2 = self.q_target_2(stag, pi_tag_2).mean(dim=0) log_pi_tag = self.pi_net.log_prob(torch.cat( [pi_tag_1, pi_tag_2])).mean(dim=0).sum(dim=1) q_target = torch.min(q_target_1, q_target_2) - self.alpha * log_pi_tag g = r + (1 - t) * self.gamma**self.n_steps * q_target if not n % self.rbi_delayed_policy_update: self.pi_net(s) pi = self.pi_net.rsample(self.rbi_learner_samples) # KL distance with update step beta = autograd.Variable(pi.data, requires_grad=True) qa_1 = self.q_net_1(s, beta) qa_2 = self.q_net_2(s, beta) qa = torch.min(qa_1, qa_2) gradients = autograd.grad(outputs=qa, inputs=beta, grad_outputs=torch.ones_like(qa), create_graph=False, retain_graph=False, only_inputs=True)[0] # calculate an alternative for the gradient lr = .001 # beta = (beta + lr * gradients / torch.norm(gradients, dim=-1, keepdim=True)).detach() beta = clipped_gd(beta, gradients, lr, 1.).detach() log_pi = self.pi_net.log_prob(pi) log_beta = self.pi_net.log_prob(beta) with torch.no_grad(): qa_1 = self.q_net_1(s, beta) qa_2 = self.q_net_2(s, beta) qatag = torch.min(qa_1, qa_2).unsqueeze(-1) cmin = 0.5 cmax = 1.5 rank = torch.argsort(torch.argsort(qatag, dim=0, descending=True), dim=0, descending=False) w = cmin * torch.ones_like(beta) m = int((1 - cmin) * n / (cmax - cmin)) w += (cmax - cmin) * (rank < m).float() w += ((1 - cmin) * n - m * (cmax - cmin)) * (rank == m).float() # loss_p = (self.alpha * log_pi - log_beta).mean() loss_p = -(w * (log_beta - log_pi)).sum(dim=-1).mean(dim=0).sum() with torch.no_grad(): entropy = self.pi_net.entropy().sum(dim=-1).mean() # numerical gradient (different score) # beta = autograd.Variable(pi.data, requires_grad=True) # # qa_1 = self.q_net_1(s, beta) # qa_2 = self.q_net_2(s, beta) # qa = torch.min(qa_1, qa_2) # # gradients = autograd.grad(outputs=qa, inputs=beta, grad_outputs=torch.ones_like(qa), # create_graph=False, retain_graph=False, only_inputs=True)[0] # # # calculate an alternative for the gradient # lr = 0.01 # beta = (beta + lr * gradients).detach() # # with torch.no_grad(): # qa_1 = self.q_net_1(s, beta) # qa_2 = self.q_net_2(s, beta) # # qatag = torch.min(qa_1, qa_2) # qatag = (qa_1 + qa_2) / 2 # # dq = (qatag - qa.detach()) / torch.norm(lr * gradients, dim=-1, keepdim=True) # ngrad = gradients / torch.norm(gradients, dim=-1, keepdim=True) # gradients = dq.unsqueeze(-1) * ngrad # # # log_pi = self.pi_net.log_prob(pi).sum(dim=-1).mean(dim=0) # dq = (pi * gradients.detach()).sum(dim=-1).mean(dim=0) # # loss_p = (self.alpha * log_pi - dq).mean() # # with torch.no_grad(): # entropy = self.pi_net.entropy().sum(dim=-1).mean() # algernative gradient (same score) # beta = autograd.Variable(pi.data, requires_grad=True) # # qa_1 = self.q_net_1(s, beta) # qa_2 = self.q_net_2(s, beta) # qa = torch.min(qa_1, qa_2) # # gradients = autograd.grad(outputs=qa, inputs=beta, grad_outputs=torch.ones_like(qa), # create_graph=False, retain_graph=False, only_inputs=True)[0] # # log_pi = self.pi_net.log_prob(pi).sum(dim=-1).mean(dim=0) # dq = (pi * gradients.detach()).sum(dim=-1).mean(dim=0) # # loss_p = (self.alpha * log_pi - dq).mean() # # with torch.no_grad(): # entropy = self.pi_net.entropy().sum(dim=-1).mean() # ORIGINAL FORMULATION # qa_1 = self.q_net_1(s, pi).mean(dim=0) # qa_2 = self.q_net_2(s, pi).mean(dim=0) # qa = torch.min(qa_1, qa_2) # # log_pi = self.pi_net.log_prob(pi).mean(dim=0).sum(dim=1) # # loss_p = (self.alpha * log_pi - qa).mean() # # with torch.no_grad(): # entropy = self.pi_net.entropy().sum(dim=-1).mean() # entropy = self.pi_net.entropy().sum(dim=-1).mean() # loss_p -= 0 * entropy self.optimizer_p.zero_grad() loss_p.backward() if self.clip_p: nn.utils.clip_grad_norm(self.pi_net.parameters(), self.clip_p) self.optimizer_p.step() # alpha loss if self.entropy_tunning: alpha_loss = -(self.log_alpha * (log_pi + self.target_entropy).detach()).mean() # alpha_loss = -(self.log_alpha * (-self.pi_net.entropy().sum(dim=1) + self.target_entropy).detach()).mean() self.optimizer_alpha.zero_grad() alpha_loss.backward() self.optimizer_alpha.step() self.alpha = float(self.log_alpha.exp()) train_results['scalar']['alpha'].append(float(self.alpha)) train_results['scalar']['objective'].append(float(-loss_p)) train_results['scalar']['entropy'].append(float(entropy)) # soft_update(self.pi_net, self.pi_target, self.tau) qa = self.q_net_1(s, a) loss_q_1 = F.mse_loss(qa, g, reduction='mean') qa = self.q_net_2(s, a) loss_q_2 = F.mse_loss(qa, g, reduction='mean') self.optimizer_q_1.zero_grad() loss_q_1.backward() if self.clip_q: nn.utils.clip_grad_norm(self.q_net_1.parameters(), self.clip_q) self.optimizer_q_1.step() self.optimizer_q_2.zero_grad() loss_q_2.backward() if self.clip_q: nn.utils.clip_grad_norm(self.q_net_2.parameters(), self.clip_q) self.optimizer_q_2.step() train_results['scalar']['loss_q_1'].append(float(loss_q_1)) train_results['scalar']['loss_q_2'].append(float(loss_q_2)) soft_update(self.q_net_1, self.q_target_1, self.tau) soft_update(self.q_net_2, self.q_target_2, self.tau) return train_results
def argsort(input, dim, descending): return th.argsort(input, dim=dim, descending=descending)
def compute_jaccard_distance(target_features, k1=20, k2=6, cam_features=None, print_flag=True, search_option=3): end = time.time() N = target_features.size(0) if (search_option < 3): # accelerate matrix distance computing target_features = target_features.cuda() else: target_features = target_features.cpu() if print_flag: print('Computing original distance...') original_dist = torch.pow(target_features, 2).sum(dim=1, keepdim=True) * 2 original_dist = original_dist.expand( N, N) - 2 * torch.mm(target_features, target_features.t()) if (cam_features is not None): if (search_option < 3): # accelerate matrix distance computing cam_features = cam_features.cuda() else: cam_features = cam_features.cpu() cam_dist = torch.pow(cam_features, 2).sum(dim=1, keepdim=True) * 2 cam_dist = cam_dist.expand( N, N) - 2 * torch.mm(cam_features, cam_features.t()) original_dist -= 0.1 * cam_dist del cam_dist original_dist /= original_dist.max(0)[0] original_dist = original_dist.t() initial_rank = torch.argsort(original_dist, dim=-1) original_dist = original_dist.cpu() initial_rank = initial_rank.cpu() all_num = gallery_num = original_dist.size(0) del target_features if print_flag: print('Computing Jaccard distance...') nn_k1 = [] nn_k1_half = [] for i in range(all_num): nn_k1.append(k_reciprocal_neigh(initial_rank, i, k1)) nn_k1_half.append( k_reciprocal_neigh(initial_rank, i, int(np.around(k1 / 2)))) V = torch.zeros(all_num, all_num) for i in range(all_num): k_reciprocal_index = nn_k1[i] k_reciprocal_expansion_index = k_reciprocal_index for candidate in k_reciprocal_index: candidate_k_reciprocal_index = nn_k1_half[candidate] if (len( np.intersect1d(candidate_k_reciprocal_index, k_reciprocal_index)) > 2 / 3 * len(candidate_k_reciprocal_index)): k_reciprocal_expansion_index = torch.cat( (k_reciprocal_expansion_index, candidate_k_reciprocal_index)) k_reciprocal_expansion_index = torch.unique( k_reciprocal_expansion_index) ## element-wise unique weight = torch.exp(-original_dist[i, k_reciprocal_expansion_index]) V[i, k_reciprocal_expansion_index] = weight / torch.sum(weight) if k2 != 1: k2_rank = initial_rank[:, :k2].clone().view(-1) V_qe = V[k2_rank] V_qe = V_qe.view(initial_rank.size(0), k2, -1).sum(1) V_qe /= k2 V = V_qe del V_qe del initial_rank invIndex = [] for i in range(gallery_num): invIndex.append(torch.nonzero(V[:, i])[:, 0]) #len(invIndex)=all_num jaccard_dist = torch.zeros_like(original_dist) for i in range(all_num): temp_min = torch.zeros(1, gallery_num) indNonZero = torch.nonzero(V[i, :])[:, 0] indImages = [] indImages = [invIndex[ind] for ind in indNonZero] for j in range(len(indNonZero)): temp_min[0, indImages[j]] = temp_min[0, indImages[j]] + torch.min( V[i, indNonZero[j]], V[indImages[j], indNonZero[j]]) jaccard_dist[i] = 1 - temp_min / (2 - temp_min) del invIndex del V pos_bool = (jaccard_dist < 0) jaccard_dist[pos_bool] = 0.0 if print_flag: print("Time cost: {}".format(time.time() - end)) return jaccard_dist
output = Q11 + Q21 + Q12 + Q22 return output if __name__ == '__main__': # Set the device if torch.cuda.is_available(): device = torch.device("cuda:0") else: device = torch.device("cpu") print("WARNING: CPU only, this will be slow!") image = image_loader("umbrella.jpg", 228) print(image.shape) model = torchvision.models.resnet18(pretrained=True) model.to(device) outputs = model(image) print(torch.argsort(outputs)) print(dict([*model.named_modules()]).keys()) fe = FeatureExtractor(model, ["conv1", "layer2.0.conv1", "layer4.1.conv2"]) features = fe.add_features(image) print(features.shape) #print(fe(image)["conv1", "layer4.1.conv2"].shape) plt.imshow(features.squeeze()[675].detach().cpu().numpy()) plt.show()
def test_step(model, test_triples, all_true_triples, args): ''' Evaluate the model on test or valid datasets ''' model.eval() # Otherwise use standard (filtered) MRR, MR, HITS@1, HITS@3, and HITS@10 metrics # Prepare dataloader for evaluation test_dataloader_head = DataLoader( TestDataset(test_triples, all_true_triples, args.nentity, args.nrelation, 'head-batch'), batch_size=args.test_batch_size, num_workers=max(1, args.cpu_num // 2), collate_fn=TestDataset.collate_fn) test_dataloader_tail = DataLoader( TestDataset(test_triples, all_true_triples, args.nentity, args.nrelation, 'tail-batch'), batch_size=args.test_batch_size, num_workers=max(1, args.cpu_num // 2), collate_fn=TestDataset.collate_fn) test_dataset_list = [test_dataloader_head, test_dataloader_tail] logs = [] step = 0 total_steps = sum([len(dataset) for dataset in test_dataset_list]) with torch.no_grad(): for test_dataset in test_dataset_list: for positive_sample, negative_sample, filter_bias, mode in test_dataset: if args.cuda: positive_sample = positive_sample.cuda() negative_sample = negative_sample.cuda() filter_bias = filter_bias.cuda() batch_size = positive_sample.size(0) score = model((positive_sample, negative_sample), mode) score += filter_bias # Explicitly sort all the entities to ensure that there is no test exposure bias argsort = torch.argsort(score, dim=1, descending=True) if mode == 'head-batch': positive_arg = positive_sample[:, 0] elif mode == 'tail-batch': positive_arg = positive_sample[:, 2] else: raise ValueError('mode %s not supported' % mode) for i in range(batch_size): # Notice that argsort is not ranking ranking = (argsort[i, :] == positive_arg[i]).nonzero() assert ranking.size(0) == 1 # ranking + 1 is the true ranking used in evaluation metrics ranking = 1 + ranking.item() logs.append({ 'MRR': 1.0 / ranking, 'MR': float(ranking), 'HITS@1': 1.0 if ranking <= 1 else 0.0, 'HITS@3': 1.0 if ranking <= 3 else 0.0, 'HITS@10': 1.0 if ranking <= 10 else 0.0, }) if step % args.test_log_steps == 0: logging.info('Evaluating the model... (%d/%d)' % (step, total_steps)) step += 1 metrics = {} for metric in logs[0].keys(): metrics[metric] = sum([log[metric] for log in logs]) / len(logs) return metrics
def speech_collate(batch, pad_val=0.0): r"""Puts each data field into a tensor with outer dimension batch size""" # split features and keys utt_keys = [] inpt_batch = [] target_batch = [] speaker_ints = [] for b in batch: # append values utt_keys.append(b["utt_key"]) inpt_batch.append(b["inpt_feat"]) if "target_feat" in b: target_batch.append(b["target_feat"]) speaker_ints.append(b["speaker_int"]) # max seq length seq_len = [b.size(0) for b in inpt_batch] max_seq = max(seq_len) # pad to max length inpt_batch = [ ConstantPad1d((0, int(max_seq - b.size(0))), value=pad_val)(b.transpose(0, 1)) for b in inpt_batch ] # sort seq & get sorted indices indices = torch.argsort(torch.tensor(seq_len), descending=True) seq_len.sort(reverse=True) # sort batch (descending order) for torch.rnn compatibility inpt_batch = [inpt_batch[i] for i in indices] inpt_batch = torch.stack(inpt_batch, dim=0) # (B, f, T) -> (B, T, f) inpt_batch = inpt_batch.permute(0, 2, 1) # rearrange speaker ints and utt_keys to match batches speaker_ints = torch.tensor([speaker_ints[i] for i in indices]) utt_keys = [utt_keys[i] for i in indices] # Batch Dict batch_dict = { "utt_keys": utt_keys, "seq_len": seq_len, "input_batch": inpt_batch, "speaker_ints": speaker_ints } if "target_feat" in batch[0]: target_batch = [ ConstantPad1d((0, int(max_seq - b.size(0))), value=pad_val)(b.transpose(0, 1)) for b in target_batch ] target_batch = [target_batch[i] for i in indices] target_batch = torch.stack(target_batch, dim=0) # (B, f, T) -> (B, T, f) batch_dict["target_batch"] = target_batch.permute(0, 2, 1) return batch_dict
def argsort(self, x, dim=-1): return torch.argsort(x, dim=dim)
def ctc_beam_search_decoder(log_probs_seq, lm_scorer=None, beam_size=100, blank=0, cutoff_prob=1.0, cutoff_top_n=None): """ Performs prefix beam search on the output of a CTC network. Args: log_probs_seq (tensor): The log probabilities. Should be a 2D array (timesteps x alphabet_size) lm_scorer (func): Language model function. Should take as input a string and output a probability. beam_size (int): The beam width. Will keep the `beam_size` most likely candidates at each timestep. blank (int): Blank label index cutoff_prob: Cutoff probability for pruning. Defaults to `1.0`, meaning no pruning cutoff_top_n: Cutoff number for pruning. Retruns: string: The decoded CTC output. """ T, V = log_probs_seq.shape log_cutoff_prob = math.log(cutoff_prob) cutoff_top_n = min(cutoff_top_n, V) if cutoff_top_n else V beams = Beams(is_valid=lm_scorer.is_valid if lm_scorer else None) for t in range(T): log_probs = log_probs_seq[t] curr_beams = list(beams.items()) # A default dictionary to store the next step candidates. num_prefixes = len(curr_beams) # min_cutoff = curr_beams[-1][-1]['score_ctc'] + log_probs[blank] min_cutoff = curr_beams[-1][-1].score_ctc + log_probs[blank] # Prunning step pruned_indexes = torch.arange(len(log_probs)).tolist() if log_cutoff_prob < 0.0 or cutoff_top_n < V: idxs = torch.argsort(log_probs, descending=True) n_idxs = min( (logcumsumexp(log_probs[idxs], 0) <= log_cutoff_prob).sum(), cutoff_top_n, V) pruned_indexes = idxs[:n_idxs].tolist() for token_index in pruned_indexes: p = log_probs[token_index].item() # The variables p_b and p_nb are respectively the # probabilities for the prefix given that it ends in a # blank and does not end in a blank at this time step. for prefix, beam in curr_beams: # p_b, p_nb = beam['p_b'], beam['p_nb'] p_b, p_nb = beam.p_b, beam.p_nb # if (num_prefixes == beam_size) and p + beam['score_ctc'] < min_cutoff: if (num_prefixes == beam_size) and p + beam.score_ctc < min_cutoff: break # If we propose a blank the prefix doesn't change. Only the probability of ending # in blank gets updated. if token_index == blank: # beam['n_p_b'] = np.logaddexp(beam['n_p_b'], beam['score_ctc'] + p) beam.n_p_b = np.logaddexp(beam.n_p_b, beam.score_ctc + p) continue # Extend the prefix by the new character s and add it to the beam[' Only'] the # probability of not ending in blank gets updated. last_token_index = prefix[-1] if prefix else None if token_index == last_token_index: # If s is repeated at the end we also update the unchanged prefix. This is the # merging case. # beam['n_p_nb'] = np.logaddexp(beam['n_p_nb'], p_nb + p) beam.n_p_nb = np.logaddexp(beam.n_p_nb, p_nb + p) n_prefix = prefix + (token_index, ) # Must update state for prefix search n_beam = beams.getitem(n_prefix, previous_beam=beam) if not n_beam: continue # n_p_b, n_p_nb = n_beam['n_p_b'], n_beam['n_p_nb'] n_p_b, n_p_nb = n_beam.n_p_b, n_beam.n_p_nb if token_index == last_token_index and p_b > -float('inf'): # We don't include the previous probability of not ending in blank (p_nb) # if s is repeated at the end. The CTC algorithm merges characters not # separated by a blank. n_p_nb = np.logaddexp(n_p_nb, p_b + p) elif token_index != last_token_index: # n_p_nb = np.logaddexp(n_p_nb, beam['score_ctc'] + p) n_p_nb = np.logaddexp(n_p_nb, beam.score_ctc + p) if lm_scorer: # LM scorer has access and updates the state variable # p_lm = lm_scorer(n_prefix, n_beam['state']) # n_beam['score_lm'] = beam['score_lm'] + p_lm p_lm = lm_scorer(n_prefix, n_beam.state) n_beam.score_lm = beam.score_lm + p_lm # n_beam['n_p_b'] = n_p_b # n_beam['n_p_nb'] = n_p_nb n_beam.n_p_b = n_p_b n_beam.n_p_nb = n_p_nb # Update the probabilities beams.step() # Trim the beam before moving on to the next time-step. beams.topk_(beam_size) # score the eos # TODO improve this step (better readability) if lm_scorer: for prefix, beam in beams.items(): if prefix: # p_lm = lm_scorer(prefix, beam['state'], eos=True) # beam['score_lm'] += p_lm p_lm = lm_scorer(prefix, beam.state, eos=True) beam.score_lm += p_lm # Return the top beam_size -log probabilities without the lm scoring # return [(-beam['score_ctc'], p, beam['timesteps']) for p, beam in beams.sort()] return [(-beam.score_ctc, p, beam.timesteps) for p, beam in beams.sort()]
def argsort(self, dim=None, descending=False): r"""See :func: `torch.argsort`""" return torch.argsort(self, dim, descending)
import torch a = torch.randn(4, 4) print(a) sa = torch.argsort(a, descending=True) print(sa) print(sa[:, :2])
def _get_new_words(self, current_text, indices_to_modify): """Get replacement words for the word we want to replace using BAE method. Args: current_text (AttackedText): Text we want to get replacements for. indices_to_modify (list[int]): list of word indices where we want to insert """ masked_texts = [] for index in indices_to_modify: masked_text = current_text.insert_text_before_word_index( index, self._lm_tokenizer.mask_token) # Obtain window masked_text = masked_text.text_window_around_index( index, self.window_size) masked_texts.append(masked_text) i = 0 # 2-D list where for each index to modify we have a list of replacement words new_words = [] while i < len(masked_texts): inputs = self._encode_text(masked_texts[i:i + self.batch_size]) ids = inputs["input_ids"].tolist() with torch.no_grad(): preds = self._language_model(**inputs)[0] for j in range(len(ids)): try: # Need try-except b/c mask-token located past max_length might be truncated by tokenizer masked_index = ids[j].index( self._lm_tokenizer.mask_token_id) except ValueError: new_words.append([]) continue mask_token_logits = preds[j, masked_index] mask_token_probs = torch.softmax(mask_token_logits, dim=0) ranked_indices = torch.argsort(mask_token_probs, descending=True) top_words = [] for _id in ranked_indices: _id = _id.item() word = self._lm_tokenizer.convert_ids_to_tokens(_id) if utils.check_if_subword( word, self._language_model.config.model_type, (masked_index == 1), ): word = utils.strip_BPE_artifacts( word, self._language_model.config.model_type) if (mask_token_probs[_id] >= self.min_confidence and utils.is_one_word(word) and not utils.check_if_punctuations(word)): top_words.append(word) if (len(top_words) >= self.max_candidates or mask_token_probs[_id] < self.min_confidence): break new_words.append(top_words) i += self.batch_size return new_words
add_special_tokens=False, return_tensors="pt").input_ids.to(device) predictions_aff_a = model(input_ids, decoder_input_ids=decoder_ids).logits input_ids = torch.tensor( [tokenizer.encode(neg_a[i], add_special_tokens=True)]).to(device) with torch.no_grad(): decoder_ids = tokenizer("<pad> <extra_id_0>", add_special_tokens=False, return_tensors="pt").input_ids.to(device) predictions_neg_a = model(input_ids, decoder_input_ids=decoder_ids).logits aff_a_preds = [] predictions_aff_a = torch.softmax(predictions_aff_a[0, 1], dim=0) # 1 is position of <extra_id_0> top_inds = torch.argsort(predictions_aff_a, descending=True)[:5].cpu().numpy() for top_ind in top_inds: aff_a_preds.append(tokenizer.decode([top_ind])) neg_a_preds = [] predictions_neg_a = torch.softmax(predictions_neg_a[0, 1], dim=0) # 1 is position of <extra_id_0> top_inds = torch.argsort(predictions_neg_a, descending=True)[:5].cpu().numpy() for top_ind in top_inds: neg_a_preds.append(tokenizer.decode([top_ind])) print(aff_a_preds, neg_a_preds)
metrics[f'{stage}_{metric}'] = [] for epoch in range(1, EPOCHS + 1): for data_loader in [data_loader_train, data_loader_test]: metrics_epoch = {key: [] for key in metrics.keys()} stage = 'train' if data_loader == data_loader_test: stage = 'test' for x, y, lengths in data_loader: x = x.float().to(DEVICE) y = y.float().to(DEVICE) idxes = torch.argsort(lengths, descending=True) lengths = lengths[idxes] max_len = int(lengths.max()) # sort sentences by length desc and slice # in x last word is either empty or 'END', and in y it is shifted first word) x = x[idxes, :max_len] y = y[idxes, :max_len] x_packed = pack_padded_sequence(x, lengths, batch_first=True) y_packed = pack_padded_sequence(y, lengths, batch_first=True) y_prim_packed = model.forward(x_packed) weights = torch.from_numpy(dataset_full.weights[torch.argmax( y_packed.data, dim=1).cpu().numpy()]) weights = weights.unsqueeze(dim=1).to(DEVICE) loss = -torch.mean(
def inverse_permutation(self): return torch.argsort(self.permutation)
def test_box_decomposition_list(self): ref_point_raw = torch.zeros(3, device=self.device) pareto_Y_raw = torch.tensor( [ [1.0, 2.0, 1.0], [2.0, 0.5, 1.0], ], device=self.device, ) for m, dtype in product((2, 3), (torch.float, torch.double)): ref_point = ref_point_raw[:m].to(dtype=dtype) pareto_Y = pareto_Y_raw[:, :m].to(dtype=dtype) pareto_Y_list = [pareto_Y[:0, :m], pareto_Y[:, :m]] bds = [ FastNondominatedPartitioning(ref_point=ref_point, Y=Y) for Y in pareto_Y_list ] bd = BoxDecompositionList(*bds) # test pareto Y bd_pareto_Y_list = bd.pareto_Y pareto_Y1 = pareto_Y_list[1] expected_pareto_Y1 = (pareto_Y1[torch.argsort(-pareto_Y1[:, 0])] if m == 2 else pareto_Y1) self.assertTrue(torch.equal(bd_pareto_Y_list[0], pareto_Y_list[0])) self.assertTrue( torch.equal(bd_pareto_Y_list[1], expected_pareto_Y1)) # test ref_point self.assertTrue( torch.equal(bd.ref_point, ref_point.unsqueeze(0).expand(2, -1))) # test get_hypercell_bounds cell_bounds = bd.get_hypercell_bounds() expected_cell_bounds1 = bds[1].get_hypercell_bounds() self.assertTrue( torch.equal(cell_bounds[:, 1], expected_cell_bounds1)) # the first pareto set in the list is empty so the cell bounds # should contain one cell that spans the entire area (bounded by the # ref_point) and then empty cells, bounded from above and below by the # ref point. expected_cell_bounds0 = torch.zeros_like(expected_cell_bounds1) # set the upper bound for the first cell to be inf expected_cell_bounds0[1, 0, :] = float("inf") self.assertTrue( torch.equal(cell_bounds[:, 0], expected_cell_bounds0)) # test compute_hypervolume expected_hv = torch.stack([b.compute_hypervolume() for b in bds], dim=0) hv = bd.compute_hypervolume() self.assertTrue(torch.equal(expected_hv, hv)) # test update with batched tensor new_Y = torch.empty(2, 1, m, dtype=dtype, device=self.device) new_Y[0] = 1 new_Y[1] = 3 bd.update(new_Y) bd_pareto_Y_list = bd.pareto_Y self.assertTrue(torch.equal(bd_pareto_Y_list[0], new_Y[0])) self.assertTrue(torch.equal(bd_pareto_Y_list[1], new_Y[1])) # test update with list bd = BoxDecompositionList(*bds) bd.update([new_Y[0], new_Y[1]]) bd_pareto_Y_list = bd.pareto_Y self.assertTrue(torch.equal(bd_pareto_Y_list[0], new_Y[0])) self.assertTrue(torch.equal(bd_pareto_Y_list[1], new_Y[1])) # test update with wrong shape bd = BoxDecompositionList(*bds) with self.assertRaises(BotorchTensorDimensionError): bd.update(new_Y.unsqueeze(0))
noisy_img_list.append(noisy_img) for step in range(num_steps): step_loss = 0 step_fitloss = 0 for i in range(num_image): img1 = image_list[i] noisy_img1 = noisy_img_list[i] x = im2col(img1, (patch_size, patch_size)) # x_noisy = im2col(noisy_img1,(patch_size,patch_size)) x = torch.tensor(x, dtype=torch.float).cuda() # x_noisy = torch.tensor(x_noisy, dtype=torch.float).cuda() for batch_index in range(batchs_size): ref_ind = np.random.randint(x.shape[1])# pick random ref patch x_ref = x[:, ref_ind:ref_ind + 1] norms = torch.norm((x_ref-x), dim=0)# norm matrix x is clean patch_image match_inds = torch.argsort(norms)[1:num_matches + 1] # 5 number match un_match_inds = torch.argsort(norms)[50:50+num_unmatches] # x_ref1 = x_noisy[:, ref_ind:ref_ind + 1] x_matched = x[:, match_inds] x_unmatched = x[:,un_match_inds] # loss_fit = torch.mean(torch.norm(hard_thresh(W @ x_ref, threshold) - hard_thresh(W @ x_matched, threshold), dim=0)) \ # - torch.mean(torch.norm(hard_thresh(W @ x_ref, threshold) - hard_thresh(W @ x_unmatched, threshold), dim=0)) loss_fit = torch.mean(torch.norm(W@x_ref-W@x_matched,dim=0))-torch.mean(torch.norm(W@x_ref-W@x_unmatched,dim=0)) loss_reg = - gamma_0 * W.slogdet()[1] + gamma_1 * torch.sum(torch.abs(W)**2)#torch.norm(W)#torch.sum((W)**2) loss = loss_fit + loss_reg step_loss += loss.item() step_fitloss += loss_fit.item() # W1 = W.detach().numpy() # cond.append(np.linalg.cond(W1)) opti.zero_grad() loss.backward()
def get_seg_single(self, cate_preds, seg_preds, kernel_preds, featmap_size, img_shape, ori_shape, scale_factor, cfg, rescale=False, debug=False): assert len(cate_preds) == len(kernel_preds) # overall info. h, w, _ = img_shape upsampled_size_out = (featmap_size[0] * 4, featmap_size[1] * 4) # process. inds = (cate_preds > cfg.score_thr) cate_scores = cate_preds[inds] if len(cate_scores) == 0: return None # cate_labels & kernel_preds inds = inds.nonzero() cate_labels = inds[:, 1] kernel_preds = kernel_preds[inds[:, 0]] # trans vector. size_trans = cate_labels.new_tensor( self.seg_num_grids).pow(2).cumsum(0) strides = kernel_preds.new_ones(size_trans[-1]) n_stage = len(self.seg_num_grids) strides[:size_trans[0]] *= self.strides[0] for ind_ in range(1, n_stage): strides[size_trans[ind_ - 1]:size_trans[ind_]] *= self.strides[ind_] strides = strides[inds[:, 0]] # mask encoding. I, N = kernel_preds.shape kernel_preds = kernel_preds.view(I, N, 1, 1) seg_preds = F.conv2d(seg_preds, kernel_preds, stride=1).squeeze(0).sigmoid() # mask. seg_masks = seg_preds > cfg.mask_thr sum_masks = seg_masks.sum((1, 2)).float() # filter. keep = sum_masks > strides if keep.sum() == 0: return None seg_masks = seg_masks[keep, ...] seg_preds = seg_preds[keep, ...] sum_masks = sum_masks[keep] cate_scores = cate_scores[keep] cate_labels = cate_labels[keep] # mask scoring. seg_scores = (seg_preds * seg_masks.float()).sum((1, 2)) / sum_masks cate_scores *= seg_scores # sort and keep top nms_pre sort_inds = torch.argsort(cate_scores, descending=True) if len(sort_inds) > cfg.nms_pre: sort_inds = sort_inds[:cfg.nms_pre] seg_masks = seg_masks[sort_inds, :, :] seg_preds = seg_preds[sort_inds, :, :] sum_masks = sum_masks[sort_inds] cate_scores = cate_scores[sort_inds] cate_labels = cate_labels[sort_inds] # Matrix NMS cate_scores = matrix_nms(seg_masks, cate_labels, cate_scores, kernel=cfg.kernel, sigma=cfg.sigma, sum_masks=sum_masks) # filter. keep = cate_scores >= cfg.update_thr if keep.sum() == 0: return None seg_preds = seg_preds[keep, :, :] cate_scores = cate_scores[keep] cate_labels = cate_labels[keep] # sort and keep top_k sort_inds = torch.argsort(cate_scores, descending=True) if len(sort_inds) > cfg.max_per_img: sort_inds = sort_inds[:cfg.max_per_img] seg_preds = seg_preds[sort_inds, :, :] cate_scores = cate_scores[sort_inds] cate_labels = cate_labels[sort_inds] seg_preds = F.interpolate(seg_preds.unsqueeze(0), size=upsampled_size_out, mode='bilinear')[:, :, :h, :w] seg_masks = F.interpolate(seg_preds, size=ori_shape[:2], mode='bilinear').squeeze(0) seg_masks = seg_masks > cfg.mask_thr return seg_masks, cate_labels, cate_scores
def forward(self, data, final, start): x, edge_index, pos, batch = data.x, data.edge_index, data.pos, data.batch x_start = x # Only street based pooling if self.clustering == 'Street': batchClusters1 = self.clusters1 batchCat = self.categories batchClusters2 = self.clusters2 batch_size = torch.max(batch) + 1 # Divide clusters and categories from different batches for i in range(1, batch_size): batchClusters1 = torch.cat( (batchClusters1, self.clusters1 + i * self.maxCluster1)) batchCat = torch.cat((batchCat, self.categories + i * 5)) batchClusters2 = torch.cat( (batchClusters2, self.clusters2 + i * self.maxCluster2)) batchCat = batchCat.long() data.batch = batchCat data2 = data # Both pooled branches, max pooling data = max_pool(batchClusters1, data) x_t, edge_index_t, pos_t, batchCat_t = data.x, data.edge_index, data.pos, data.batch data2 = max_pool(batchClusters2, data2) x_t2, edge_index_t2, pos_t2, batchCat_t2 = data2.x, data2.edge_index, data2.pos, data2.batch edge_index_t, temp = add_self_loops(edge_index_t) edge_index_t2, temp = add_self_loops(edge_index_t2) # Add coordinates and categories to input if self.coords: cats = (batchCat % 5).float() catsT = (batchCat_t % 5).float() catsT2 = (batchCat_t2 % 5).float() normPos = pos / torch.max(pos) normPos_t = pos_t / torch.max(pos_t) normPos_t2 = pos_t2 / torch.max(pos_t2) normCat = (cats / 4).view(batchCat.size(0), 1) normCat_t = (catsT / 4).view(batchCat_t.size(0), 1) normCat_t2 = (catsT2 / 4).view(batchCat_t2.size(0), 1) x = torch.cat((x, normPos, normCat), 1) x_t = torch.cat((x_t, normPos_t, normCat_t), 1) x_t2 = torch.cat((x_t2, normPos_t2, normCat_t2), 1) # Perform convolution blocks in all 3 branches for i in range(self.layers): x_temp = x x = self.moduleList1[i](x, edge_index) if self.midSkip: x = torch.cat((x, x_temp), 1) if i == 0: bn = self.bn elif i == 1: bn = self.bn2 else: bn = self.bn3 x = F.relu(bn(self.skipList1[i](x, edge_index))) for i in range(self.layers): x_ttemp = x_t x_t = self.moduleList2[i](x_t, edge_index_t) if self.midSkip: x_t = torch.cat((x_t, x_ttemp), 1) if i == 0: bn = self.bn elif i == 1: bn = self.bn2 else: bn = self.bn3 x_t = F.relu(bn(self.skipList2[i](x_t, edge_index_t))) for i in range(self.layers): x_ttemp2 = x_t2 x_t2 = self.moduleList3[i](x_t2, edge_index_t2) if self.midSkip: x_t2 = torch.cat((x_t2, x_ttemp2), 1) if i == 0: bn = self.bn elif i == 1: bn = self.bn2 else: bn = self.bn3 x_t2 = F.relu(bn(self.skipList3[i](x_t2, edge_index_t2))) # Calculate knn weights of both pooled branches for first batch (and last, since the size might be different) if start: sorter = torch.argsort(batchCat) backsorter = torch.argsort(sorter) pos = pos[sorter] batchCat = batchCat[sorter] pairs = knn(pos_t, pos, self.knn, batch_x=batchCat_t, batch_y=batchCat) yIdx, xIdx = pairs diff = pos_t[xIdx] - pos[yIdx] squared_distance = (diff * diff).sum(dim=-1, keepdim=True) weights = 1.0 / torch.clamp(squared_distance, min=1e-16) pairs2 = knn(pos_t2, pos, self.knn, batch_x=batchCat_t2, batch_y=batchCat) yIdx2, xIdx2 = pairs2 diff2 = pos_t2[xIdx2] - pos[yIdx2] squared_distance2 = (diff2 * diff2).sum(dim=-1, keepdim=True) weights2 = 1.0 / torch.clamp(squared_distance2, min=1e-16) self.weights = weights self.xIdx = xIdx self.yIdx = yIdx self.weights2 = weights2 self.xIdx2 = xIdx2 self.yIdx2 = yIdx2 self.backSorter = backsorter if final: sorter = torch.argsort(batchCat) backsorter = torch.argsort(sorter) pos = pos[sorter] batchCat = batchCat[sorter] pairs = knn(pos_t, pos, self.knn, batch_x=batchCat_t, batch_y=batchCat) yIdx, xIdx = pairs diff = pos_t[xIdx] - pos[yIdx] squared_distance = (diff * diff).sum(dim=-1, keepdim=True) weights = 1.0 / torch.clamp(squared_distance, min=1e-16) pairs2 = knn(pos_t2, pos, self.knn, batch_x=batchCat_t2, batch_y=batchCat) yIdx2, xIdx2 = pairs2 diff2 = pos_t2[xIdx2] - pos[yIdx2] squared_distance2 = (diff2 * diff2).sum(dim=-1, keepdim=True) weights2 = 1.0 / torch.clamp(squared_distance2, min=1e-16) self.weights = weights self.xIdx = xIdx self.yIdx = yIdx self.weights2 = weights2 self.xIdx2 = xIdx2 self.yIdx2 = yIdx2 self.backSorter = backsorter # Unpool pooled branches x_t = scatter_add(x_t[self.xIdx] * self.weights, self.yIdx, dim=0, dim_size=pos.size(0)) x_t = x_t / scatter_add( self.weights, self.yIdx, dim=0, dim_size=pos.size(0)) x_t = x_t[self.backSorter] x_t2 = scatter_add(x_t2[self.xIdx2] * self.weights2, self.yIdx2, dim=0, dim_size=pos.size(0)) x_t2 = x_t2 / scatter_add( self.weights2, self.yIdx2, dim=0, dim_size=pos.size(0)) x_t2 = x_t2[self.backSorter] # Input size of final convolution if self.skipconv: y = torch.cat((x, x_t, x_t2, x_start), 1) else: y = torch.cat((x, x_t, x_t2), 1) # Do final convolution y = self.conv_mix(y, edge_index) # Add dropout layer if self.p != 1: y = F.dropout(y, training=self.training, p=self.p) return y