Ejemplo n.º 1
0
    a, _ = train_dataset[int(rp[i])]
    t = a[0].item() # index of first token in the sequence
    counts[t] +=1
prob = counts/counts.sum()

%%time

from mingpt.utils import sample

n_samples = 32
start_pixel = np.random.choice(np.arange(C.size(0)), size=(n_samples, 1), replace=True, p=prob)
start_pixel = torch.from_numpy(start_pixel).to(trainer.device)
pixels = sample(model, start_pixel, 32*32-1, temperature=1.0, sample=True, top_k=100)

# for visualization we have to invert the permutation used to produce the pixels
iperm = torch.argsort(train_dataset.perm)

ncol = 8
nrow = n_samples // ncoal
plt.figure(figsize = (16, 8))
for i in range(n_samples):
    pxi = pixels[i][iperm] # note: undo the encoding permutation

    plt.subplot(nrow, ncol, i+1)
    plt.imshow(C[pxi].view(32, 32, 3).numpy().astype(np.unit8))
    plt.axis('off')

#visualize some of the learned positional embeddings, maybe they contain structure
plt. figure(figure=(5, 5))
nsee = 8*8
ncol = 8
Ejemplo n.º 2
0
def get_objf(batch: Dict,
             model: AcousticModel,
             P: k2.Fsa,
             device: torch.device,
             graph_compiler: MmiTrainingGraphCompiler,
             is_training: bool,
             is_update: bool,
             accum_grad: int = 1,
             den_scale: float = 1.0,
             att_rate: float = 0.0,
             tb_writer: Optional[SummaryWriter] = None,
             global_batch_idx_train: Optional[int] = None,
             optimizer: Optional[torch.optim.Optimizer] = None):
    feature = batch['features']
    supervisions = batch['supervisions']
    supervision_segments = torch.stack(
        (supervisions['sequence_idx'],
         (((supervisions['start_frame'] - 1) // 2 - 1) // 2),
         (((supervisions['num_frames'] - 1) // 2 - 1) // 2)),
        1).to(torch.int32)
    supervision_segments = torch.clamp(supervision_segments, min=0)
    indices = torch.argsort(supervision_segments[:, 2], descending=True)
    supervision_segments = supervision_segments[indices]

    texts = supervisions['text']
    texts = [texts[idx] for idx in indices]
    assert feature.ndim == 3
    # print(supervision_segments[:, 1] + supervision_segments[:, 2])

    feature = feature.to(device)
    # at entry, feature is [N, T, C]
    feature = feature.permute(0, 2, 1)  # now feature is [N, C, T]
    if is_training:
        nnet_output, encoder_memory, memory_mask = model(feature, supervisions)
        if att_rate != 0.0:
            att_loss = model.decoder_forward(encoder_memory, memory_mask,
                                             supervisions, graph_compiler)
    else:
        with torch.no_grad():
            nnet_output, encoder_memory, memory_mask = model(
                feature, supervisions)
            if att_rate != 0.0:
                att_loss = model.decoder_forward(encoder_memory, memory_mask,
                                                 supervisions, graph_compiler)

    # nnet_output is [N, C, T]
    nnet_output = nnet_output.permute(0, 2, 1)  # now nnet_output is [N, T, C]

    if is_training:
        num, den = graph_compiler.compile(texts, P)
    else:
        with torch.no_grad():
            num, den = graph_compiler.compile(texts, P)

    assert num.requires_grad == is_training
    assert den.requires_grad is False
    num = num.to(device)
    den = den.to(device)

    # nnet_output2 = nnet_output.clone()
    # blank_bias = -7.0
    # nnet_output2[:,:,0] += blank_bias

    dense_fsa_vec = k2.DenseFsaVec(nnet_output, supervision_segments)
    assert nnet_output.device == device

    num = k2.intersect_dense(num, dense_fsa_vec, 10.0)
    den = k2.intersect_dense(den, dense_fsa_vec, 10.0)

    num_tot_scores = num.get_tot_scores(log_semiring=True,
                                        use_double_scores=True)
    den_tot_scores = den.get_tot_scores(log_semiring=True,
                                        use_double_scores=True)
    tot_scores = num_tot_scores - den_scale * den_tot_scores

    (tot_score, tot_frames,
     all_frames) = get_tot_objf_and_num_frames(tot_scores,
                                               supervision_segments[:, 2])

    if is_training:

        def maybe_log_gradients(tag: str):
            if tb_writer is not None and global_batch_idx_train is not None and global_batch_idx_train % 200 == 0:
                tb_writer.add_scalars(tag,
                                      measure_gradient_norms(model, norm='l1'),
                                      global_step=global_batch_idx_train)

        if att_rate != 0.0:
            loss = (-(1.0 - att_rate) * tot_score +
                    att_rate * att_loss) / (len(texts) * accum_grad)
        else:
            loss = (-tot_score) / (len(texts) * accum_grad)
        loss.backward()
        if is_update:
            maybe_log_gradients('train/grad_norms')
            clip_grad_value_(model.parameters(), 5.0)
            maybe_log_gradients('train/clipped_grad_norms')
            if (global_batch_idx_train // accum_grad) % 200 == 0:
                # Once in a time we will perform a more costly diagnostic
                # to check the relative parameter change per minibatch.
                deltas = optim_step_and_measure_param_change(model, optimizer)
                tb_writer.add_scalars(
                    'train/relative_param_change_per_minibatch',
                    deltas,
                    global_step=global_batch_idx_train)
            else:
                optimizer.step()
            optimizer.zero_grad()

    ans = -tot_score.detach().cpu().item(), tot_frames.cpu().item(
    ), all_frames.cpu().item()
    return ans
Ejemplo n.º 3
0
 def test_compute_non_dominated_hypercell_bounds_2d(self):
     ref_point_raw = torch.zeros(2, device=self.device)
     arange = torch.arange(3, 9, device=self.device)
     pareto_Y_raw = torch.stack([arange, 11 - arange], dim=-1)
     inf = float("inf")
     expected_cell_bounds_raw = torch.tensor(
         [
             [
                 [8.0, 0.0],
                 [7.0, 3.0],
                 [6.0, 4.0],
                 [5.0, 5.0],
                 [4.0, 6.0],
                 [3.0, 7.0],
                 [0.0, 8.0],
             ],
             [
                 [inf, inf],
                 [8.0, inf],
                 [7.0, inf],
                 [6.0, inf],
                 [5.0, inf],
                 [4.0, inf],
                 [3.0, inf],
             ],
         ],
         device=self.device,
     )
     for dtype in (torch.float, torch.double):
         pareto_Y = pareto_Y_raw.to(dtype=dtype)
         ref_point = ref_point_raw.to(dtype=dtype)
         expected_cell_bounds = expected_cell_bounds_raw.to(dtype=dtype)
         # test non-batch
         cell_bounds = compute_non_dominated_hypercell_bounds_2d(
             pareto_Y_sorted=pareto_Y,
             ref_point=ref_point,
         )
         num_matches = (
             (cell_bounds.unsqueeze(0) == expected_cell_bounds.unsqueeze(1))
             .all(dim=-1)
             .any(dim=0)
             .sum()
         )
         self.assertTrue(num_matches, 7)
         # test batch
         pareto_Y_batch = torch.stack(
             [pareto_Y, pareto_Y + pareto_Y.max(dim=-2).values], dim=0
         )
         # filter out points that are not better than ref_point
         ref_point = pareto_Y.max(dim=-2).values
         pareto_Y_batch = _pad_batch_pareto_frontier(
             Y=pareto_Y_batch, ref_point=ref_point, is_pareto=True
         )
         # sort pareto_Y_batch
         pareto_Y_batch = pareto_Y_batch.gather(
             index=torch.argsort(pareto_Y_batch[..., :1], dim=-2).expand(
                 pareto_Y_batch.shape
             ),
             dim=-2,
         )
         cell_bounds = compute_non_dominated_hypercell_bounds_2d(
             ref_point=ref_point,
             pareto_Y_sorted=pareto_Y_batch,
         )
         # check hypervolume
         max_vals = (pareto_Y + pareto_Y).max(dim=-2).values
         clamped_cell_bounds = torch.min(cell_bounds, max_vals)
         total_hv = (max_vals - ref_point).prod()
         nondom_hv = (
             (clamped_cell_bounds[1] - clamped_cell_bounds[0])
             .prod(dim=-1)
             .sum(dim=-1)
         )
         hv = total_hv - nondom_hv
         self.assertEqual(hv[0].item(), 0.0)
         self.assertEqual(hv[1].item(), 49.0)
Ejemplo n.º 4
0
    def test_sort(self, device):
        # on CUDA 2048 vs >2048 have different code path for the dim being sorted
        for SIZE in (4, 2049):
            x = torch.rand(4, SIZE, device=device)
            res1val, res1ind = torch.sort(x)

            # Test inplace
            y = x.clone()
            y_inds = torch.tensor((), dtype=torch.int64, device=device)
            torch.sort(y, out=(y, y_inds))
            x_vals, x_inds = torch.sort(x)
            self.assertEqual(x_vals, y)
            self.assertEqual(x_inds, y_inds)

            # Test use of result tensor
            res2val = torch.tensor((), device=device)
            res2ind = torch.tensor((), device=device, dtype=torch.long)
            torch.sort(x, out=(res2val, res2ind))
            self.assertEqual(res1val, res2val, atol=0, rtol=0)
            self.assertEqual(res1ind, res2ind, atol=0, rtol=0)
            self.assertEqual(torch.argsort(x), res1ind)
            self.assertEqual(x.argsort(), res1ind)

            # Test sorting of random numbers
            self.assertIsOrdered('ascending', x, res2val, res2ind, 'random')

            # Test simple sort
            self.assertEqual(torch.sort(
                torch.tensor((50, 40, 30, 20, 10), device=device))[0],
                             torch.tensor((10, 20, 30, 40, 50), device=device),
                             atol=0,
                             rtol=0)

            # Test that we still have proper sorting with duplicate keys
            x = torch.floor(torch.rand(4, SIZE, device=device) * 10)
            torch.sort(x, out=(res2val, res2ind))
            self.assertIsOrdered('ascending', x, res2val, res2ind,
                                 'random with duplicate keys')

            # DESCENDING SORT
            x = torch.rand(4, SIZE, device=device)
            res1val, res1ind = torch.sort(x, x.dim() - 1, True)

            # Test use of result tensor
            res2val = torch.tensor((), device=device)
            res2ind = torch.tensor((), device=device, dtype=torch.long)
            torch.sort(x, x.dim() - 1, True, out=(res2val, res2ind))
            self.assertEqual(res1val, res2val, atol=0, rtol=0)
            self.assertEqual(res1ind, res2ind, atol=0, rtol=0)
            self.assertEqual(torch.argsort(x, x.dim() - 1, True), res1ind)
            self.assertEqual(x.argsort(x.dim() - 1, True), res1ind)

            # Test sorting of random numbers
            self.assertIsOrdered('descending', x, res2val, res2ind, 'random')

            # Test simple sort task
            self.assertEqual(torch.sort(
                torch.tensor((10, 20, 30, 40, 50), device=device), 0, True)[0],
                             torch.tensor((50, 40, 30, 20, 10), device=device),
                             atol=0,
                             rtol=0)

            # Test that we still have proper sorting with duplicate keys
            self.assertIsOrdered('descending', x, res2val, res2ind,
                                 'random with duplicate keys')

            # Test sorting with NaNs
            x = torch.rand(4, SIZE, device=device)
            x[1][2] = float('NaN')
            x[3][0] = float('NaN')
            torch.sort(x, out=(res2val, res2ind))
            self.assertIsOrdered('ascending', x, res2val, res2ind,
                                 'random with NaNs')
            torch.sort(x, out=(res2val, res2ind), descending=True)
            self.assertIsOrdered('descending', x, res2val, res2ind,
                                 'random with NaNs')
Ejemplo n.º 5
0
    TP = 0
    FP = 0
    # Test the model
    start_time = time.time()
    with torch.no_grad():
        for line in test_normal_loader:
            if len(line) < window_size:
                FP += 1
            for i in range(len(line) - window_size):
                seq = line[i:i + window_size]
                label = line[i + window_size]
                seq = torch.tensor(seq, dtype=torch.float).view(-1, window_size).to(device)
                x_onehot = torch.nn.functional.one_hot(seq.long(), num_classes).float()
                label = torch.tensor(label).view(-1).to(device)
                output = model(x_onehot)
                predicted = torch.argsort(output, 1)[0][-num_candidates:]
                if label not in predicted:
                    FP += 1
                    break

    with torch.no_grad():
        for line in test_abnormal_loader:
            if len(line) < window_size:
                TP += 1
            for i in range(len(line) - window_size):
                seq = line[i:i + window_size]
                label = line[i + window_size]
                seq = torch.tensor(seq, dtype=torch.float).view(-1, window_size).to(device)
                x_onehot = torch.nn.functional.one_hot(seq.long(), num_classes).float()
                label = torch.tensor(label).view(-1).to(device)
                output = model(x_onehot)
Ejemplo n.º 6
0
def gen_objectpairs(proposals,filter_scores):
    objectpairs_list = []

    labels= proposals.get_field('labels')
    object_scores = proposals.get_field('scores')
    bounding_box = proposals.bbox
    img_size = proposals.size
    num_boxes = len(proposals)

    if num_boxes !=0:

        a=torch.linspace(0,num_boxes-1,num_boxes).long()
        objectpairs_idx = torch.cat((a.repeat(a.size(0),1).permute(1,0).contiguous().view(-1,1),a.repeat(1,a.size(0)).permute(1,0).view(-1,1)),1)

        detection_scores = object_scores.repeat(a.size(0),1).permute(1,0).contiguous().view(-1,1)*object_scores.repeat(1,a.size(0)).permute(1,0).view(-1,1)
        filter_scores = filter_scores.view(-1,1)
        objectpairs_scores = detection_scores * filter_scores

        ignore_idx = (torch.ones((num_boxes,num_boxes))-torch.eye(num_boxes)).view(1,-1).squeeze(0)
        remain_idx = ignore_idx ==1
        objectpairs_idx = objectpairs_idx[remain_idx]
        objectpairs_scores = objectpairs_scores[remain_idx]
##
        idx = torch.argsort(objectpairs_scores,dim=0,descending=True)
        idx = idx.view(1,-1).squeeze(0)
        objectpairs_idx = objectpairs_idx[idx]
        objectpairs_scores = objectpairs_scores[idx].view(-1)
##
        if len(objectpairs_scores) !=0:

            subject_boundingboxes = bounding_box[objectpairs_idx[:,0],:]
            object_boundingboxes = bounding_box[objectpairs_idx[:,1],:]
            subject_category = labels[objectpairs_idx[:,0]]
            object_category = labels[objectpairs_idx[:,1]]
            subject_scores = object_scores[objectpairs_idx[:,0]]
            object_scores = object_scores[objectpairs_idx[:,1]]

            xs = torch.min(subject_boundingboxes[:,0],object_boundingboxes[:,0]).view(-1,1)
            ys = torch.min(subject_boundingboxes[:,1],object_boundingboxes[:,1]).view(-1,1)
            xm = torch.max(subject_boundingboxes[:,2],object_boundingboxes[:,2]).view(-1,1)
            ym = torch.max(subject_boundingboxes[:,3],object_boundingboxes[:,3]).view(-1,1)
            boxes = torch.cat((xs,ys,xm,ym),1)
##
            keep = nms(subject_boundingboxes,object_boundingboxes,objectpairs_scores,subject_category,object_category,thresh=0.25)

            boxes = boxes[keep]
            subject_boundingboxes = subject_boundingboxes[keep]
            object_boundingboxes = object_boundingboxes[keep]
            subject_category = subject_category[keep]
            object_category = object_category[keep]
            subject_scores = subject_scores[keep]
            object_scores = object_scores[keep]
            objectpairs_scores = objectpairs_scores[keep]
##

            objectpairs = BoxList(boxes, img_size, mode="xyxy")
            objectpairs.add_field("subject_boundingboxes", subject_boundingboxes)
            objectpairs.add_field("object_boundingboxes", object_boundingboxes)
            objectpairs.add_field("subject_category", subject_category)
            objectpairs.add_field("object_category", object_category)
            objectpairs.add_field("subject_scores", subject_scores)
            objectpairs.add_field("object_scores", object_scores)
            objectpairs.add_field("objectpairs_scores", objectpairs_scores)

            objectpairs_list.append(objectpairs)

            return objectpairs_list

    objectpairs = BoxList(torch.tensor([],device=bounding_box.device).view(-1,4), img_size, mode="xyxy")
    objectpairs.add_field("subject_boundingboxes", torch.tensor([],device=bounding_box.device).view(-1,4))
    objectpairs.add_field("object_boundingboxes", torch.tensor([],device=bounding_box.device).view(-1,4))
    objectpairs.add_field("subject_category", labels.new_empty((0)))
    objectpairs.add_field("object_category", labels.new_empty((0)))
    objectpairs.add_field("subject_scores", labels.new_empty((0)))
    objectpairs.add_field("object_scores", labels.new_empty((0)))
    objectpairs.add_field("objectpairs_scores", labels.new_empty((0)))
    objectpairs_list.append(objectpairs)

    return objectpairs_list
    def test_broadcast_benchmark(self):
        N = 12
        H = 8
        L = 1000
        S = 1000
        E = 64
        D = 64
        C = 200
        I = 5
        B = 63

        Q = torch.randn(N, H, L, E).cuda()
        lengths = torch.full((N,), L, dtype=torch.int32).cuda()
        groups, counts = cluster_queries(Q, lengths, C, I, B)
        sorted_g, sorted_gi = torch.sort(groups.view(N*H, -1), dim=-1)
        sorted_rev_gi = torch.argsort(sorted_gi, dim=-1)

        q_offset = torch.arange(N*H, device=Q.device).unsqueeze(-1) * L
        q_flat = (sorted_gi + q_offset).reshape(-1)

        Q_grouped = aggregate(Q, groups, 1/counts.float())
        K = torch.randn(N, H, S, E).cuda()
        QK = torch.einsum("nhle,nhse->nhls", Q_grouped, K)

        V = torch.randn(N, H, S, E).cuda()
        A = F.softmax(QK, dim=-1)
        V_new = torch.einsum("nhls,nhse->nhle", A, V)
        V_broadcast = torch.zeros((N, H, L, E), dtype=V_new.dtype).cuda()
        factors = torch.ones_like(counts, dtype=torch.float32)
        V_sorted_broadcast = clustered_broadcast(
            V_new, sorted_g.view(N, H, L), counts, factors, V_broadcast
        )
        q_rev_flat = (sorted_rev_gi + q_offset).reshape(-1)
        V_broadcast = V_sorted_broadcast.reshape(-1, D).index_select(
                0, q_rev_flat).view(N, H, L, D)

        for i in range(2000):
            factors = torch.ones_like(counts, dtype=torch.float32)
            V_sorted_broadcast = clustered_broadcast(
                V_new, sorted_g.view(N, H, L), counts, factors, V_broadcast
            )
            q_rev_flat = (sorted_rev_gi + q_offset).reshape(-1)
            V_broadcast = V_sorted_broadcast.reshape(-1, D).index_select(
                    0, q_rev_flat).view(N, H, L, D)

        s = torch.cuda.Event(enable_timing=True)
        e = torch.cuda.Event(enable_timing=True)
        s.record()
        factors = torch.ones_like(counts, dtype=torch.float32)
        V_sorted_broadcast = clustered_broadcast(
            V_new, sorted_g.view(N, H, L), counts, factors, V_broadcast
        )
        q_rev_flat = (sorted_rev_gi + q_offset).reshape(-1)
        V_broadcast = V_sorted_broadcast.reshape(-1, D).index_select(
                0, q_rev_flat).view(N, H, L, D)
        e.record()
        torch.cuda.synchronize()
        t_broadcast = s.elapsed_time(e)

        for i in range(200):
            V_broadcast_2 = broadcast(
                V_new,
                groups,
                torch.ones_like(counts, dtype=torch.float32),
                torch.zeros((N, H, L, E), device=Q.device)
            )

        s = torch.cuda.Event(enable_timing=True)
        e = torch.cuda.Event(enable_timing=True)
        s.record()
        V_broadcast_2 = broadcast(
            V_new,
            groups,
            torch.ones_like(counts, dtype=torch.float32),
            torch.zeros((N, H, L, E), device=Q.device)
        )
        e.record()
        torch.cuda.synchronize()
        t_broadcast_2 = s.elapsed_time(e)

        print("B1: {}, B2: {}".format(t_broadcast, t_broadcast_2))
def train_IL(model, train_loader, labeled_eval_loader, unlabeled_eval_loader,
             args):
    optimizer = SGD(model.parameters(),
                    lr=args.lr,
                    momentum=args.momentum,
                    weight_decay=args.weight_decay)
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer,
                                           step_size=args.step_size,
                                           gamma=args.gamma)
    criterion1 = nn.CrossEntropyLoss()
    criterion2 = BCE()
    for epoch in range(args.epochs):
        loss_record = AverageMeter()
        model.train()
        exp_lr_scheduler.step()
        w = args.rampup_coefficient * ramps.sigmoid_rampup(
            epoch, args.rampup_length)
        for batch_idx, ((x, x_bar), label,
                        idx) in enumerate(tqdm(train_loader)):
            x, x_bar, label = x.to(device), x_bar.to(device), label.to(device)
            output1, output2, feat = model(x)
            output1_bar, output2_bar, _ = model(x_bar)
            prob1, prob1_bar, prob2, prob2_bar = F.softmax(
                output1, dim=1), F.softmax(output1_bar, dim=1), F.softmax(
                    output2, dim=1), F.softmax(output2_bar, dim=1)

            mask_lb = label < args.num_labeled_classes

            rank_feat = (feat[~mask_lb]).detach()

            rank_idx = torch.argsort(rank_feat, dim=1, descending=True)
            rank_idx1, rank_idx2 = PairEnum(rank_idx)
            rank_idx1, rank_idx2 = rank_idx1[:, :args.
                                             topk], rank_idx2[:, :args.topk]

            rank_idx1, _ = torch.sort(rank_idx1, dim=1)
            rank_idx2, _ = torch.sort(rank_idx2, dim=1)

            rank_diff = rank_idx1 - rank_idx2
            rank_diff = torch.sum(torch.abs(rank_diff), dim=1)
            target_ulb = torch.ones_like(rank_diff).float().to(device)
            target_ulb[rank_diff > 0] = -1

            prob1_ulb, _ = PairEnum(prob2[~mask_lb])
            _, prob2_ulb = PairEnum(prob2_bar[~mask_lb])

            loss_ce = criterion1(output1[mask_lb], label[mask_lb])

            label[~mask_lb] = (output2[~mask_lb]
                               ).detach().max(1)[1] + args.num_labeled_classes

            loss_ce_add = w * criterion1(
                output1[~mask_lb], label[~mask_lb]
            ) / args.rampup_coefficient * args.increment_coefficient
            loss_bce = criterion2(prob1_ulb, prob2_ulb, target_ulb)
            consistency_loss = F.mse_loss(prob1, prob1_bar) + F.mse_loss(
                prob2, prob2_bar)

            loss = loss_ce + loss_bce + loss_ce_add + w * consistency_loss

            loss_record.update(loss.item(), x.size(0))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        print('Train Epoch: {} Avg Loss: {:.4f}'.format(
            epoch, loss_record.avg))
        print('test on labeled classes')
        args.head = 'head1'
        test(model, labeled_eval_loader, args)
        print('test on unlabeled classes')
        args.head = 'head2'
        test(model, unlabeled_eval_loader, args)
Ejemplo n.º 9
0
    def predict_select(self,
                       confidence: torch.Tensor,
                       box_predict: torch.Tensor,
                       default_box: torch.Tensor,
                       filter=True):

        if len(confidence.size()) > 2:
            batch_proposal = []
            batch_conf = []
            batch_offset = []
            for index in range(confidence.size()[0]):

                # this_default_box = default_box[:confidence.size()[-2], :]
                this_default_box = default_box.clone()
                this_scores = confidence[index, :, 1]
                # this_scores = confidence[index, 1:].max()
                bbox_deltas = box_predict[index]

                this_proposal = offset_to_box(this_default_box, bbox_deltas)
                this_proposal = torch.clamp(this_proposal,
                                            min=cfg.left_border,
                                            max=cfg.right_border)
                keep = this_proposal[:, 0] <= this_proposal[:, 1]
                this_proposal = this_proposal[keep]
                bbox_deltas = bbox_deltas[keep]

                keep = this_scores >= 0.5
                this_proposal = this_proposal[keep]
                bbox_deltas = bbox_deltas[keep]
                this_scores = this_scores[keep]

                # ws = this_proposal[:, 1] - this_proposal[:, 0] + 1
                # min_keep = ws < cfg.box_max_size
                # max_keep = ws > cfg.box_min_size
                # keep = torch.min(min_keep, max_keep)
                # print('keep:{}'.format(keep.sum()))

                # this_proposal = this_proposals[keep]
                # this_scores = this_scores[keep]
                if this_proposal.size()[-2] > cfg.be_topk:

                    order = torch.argsort(this_scores)[-cfg.be_topk:]
                    this_proposal = this_proposal[order]
                    this_scores = this_scores[order]

                # keep2, count = nms2(this_proposal, this_scores)

                keep2 = nms(this_proposal, this_scores, cfg.nms_threash)
                keep2 = keep2[:cfg.af_topk]

                # print(count)
                batch_offset.append(bbox_deltas[keep2])
                this_proposal = this_proposal[keep2]
                batch_proposal.append(this_proposal)
                batch_conf.append(this_scores[keep2])
            return batch_proposal, batch_conf, batch_offset
        else:
            this_proposal = offset_to_box(default_box, box_predict)
            this_proposal = torch.clamp(this_proposal,
                                        min=cfg.left_border,
                                        max=cfg.right_border)
            keep = this_proposal[:, 0] < this_proposal[:, 1]

            this_proposal = this_proposal[keep]
            box_predict = box_predict[keep]
            confidence = confidence[:, 1]
            confidence = confidence[keep]
            #######################

            keep3 = confidence >= 0.5
            # keep3 = confidence >= 0.8
            if (keep3.sum().item() > 0):
                this_proposal = this_proposal[keep3]
                box_predict = box_predict[keep3]
                confidence = confidence[keep3]
            if this_proposal.size()[-2] > cfg.be_topk:

                order = torch.argsort(confidence)[-cfg.be_topk:]
                this_proposal = this_proposal[order]
                # confidence = confidence.view(-1, 1)[order]
                confidence = confidence[order]

            # keep2, count = nms2(this_proposal, confidence)

            keep2 = nms(this_proposal, confidence, cfg.nms_threash)
            keep2 = keep2[:cfg.af_topk]
            # print(count)
            this_proposal = this_proposal[keep2]
            this_conf = confidence[keep2]
            box_predict = box_predict[keep2]
            return this_proposal, this_conf, box_predict
Ejemplo n.º 10
0
    def attack(self, entry):
        # TODO: a problem with get_important_scores is that
        # it does not care the numeber of tokens.
        # since BERT can only accept 512 tokens at max,
        # if [UNK] is inserted at a place after 512th token,
        # the importance score is essentially invalid.

        # potential solution:
        # 1. modify the tokenize to pass
        #    each entry into BertTokenizer and convert it back
        #    to keep truncated text with at max 512 tokens.
        # 2. (currently used) simply skips if mask_token_index is empty

        entry['phrase_changes'] = 0
        entry['word_changes'] = 0
        entry['changes'] = []
        entry['pred_success'] = True
        entry['success'] = False
        entry['word_num'] = len(entry['words'])

        phrase_lengths = [n for n in entry['n_words_in_phrases'] if n > 1]
        entry['phrase_num'] = len(phrase_lengths)
        entry['phrase_len'] = sum(phrase_lengths)
        entry['query_num'] = 0
        entry['final_adv'] = None

        # 1. retrieve logits and label from the target model
        encoded = self.tokenizer(entry['text'],
                                 padding=True,
                                 truncation=True,
                                 return_token_type_ids=False,
                                 return_tensors="pt")
        input_ids = encoded['input_ids'].to(self.device)
        attention_mask = encoded['attention_mask'].to(self.device)
        orig_logits = self.target_model(input_ids,
                                        attention_mask).logits.squeeze()
        orig_probs = torch.softmax(orig_logits, -1)
        orig_label = torch.argmax(orig_probs)
        max_prob = torch.max(orig_probs)

        if orig_label != entry['label']:
            entry['pred_success'] = False
            return entry

        # filter out stop_words, digits & symbol combination
        filtered_indices = filter_unwanted_phrases(self.stop_words,
                                                   entry['phrases'])

        masked_phrases = get_unk_masked(entry['text'], entry['phrase_offsets'],
                                        filtered_indices)
        importance_scores, _ = get_important_scores(masked_phrases,
                                                    self.tokenizer,
                                                    self.target_model,
                                                    orig_label, max_prob,
                                                    orig_probs, self.device)
        entry['query_num'] += len(masked_phrases)

        # this is the index after the filter and
        # cannot only applied to importance scores and filtered_indices
        sorted_filtered_indices_np = torch.argsort(
            importance_scores, dim=-1, descending=True).data.cpu().numpy()
        importance_scores_np = importance_scores.data.cpu().numpy()
        # obtain correct indices that can be used to index the entry dict
        sorted_indices_np = np.array(
            filtered_indices)[sorted_filtered_indices_np]
        sorted_importance = importance_scores_np[sorted_filtered_indices_np]
        sorted_phrases = np.array(entry['phrases'])[sorted_indices_np]
        sorted_phrase_offsets = np.array(
            entry['phrase_offsets'])[sorted_indices_np]
        sorted_n_words_in_phrase = np.array(
            entry['n_words_in_phrases'])[sorted_indices_np]

        # up to this point,
        # sorted_phrases is a sorted numPy array containing the filtered phrases ranked by importance
        # sorted_n_words_in_phrase is a sorted numPy array containing the number of words in each filtered phrases ranked by importance
        # sorted_importance is a sorted PyTorch Tensor containing importance scores ranked by importance

        max_change_threshold = len(entry['phrases'])

        # record how many perturbations have been made
        phrase_changes = 0
        word_changes = 0
        changes = []

        text = entry['text']
        phrases = entry['phrases']
        phrase_offsets = entry['phrase_offsets']
        n_words_in_phrases = entry['n_words_in_phrases']

        for idx, i in enumerate(sorted_indices_np):
            # break when attack is successful or changes exceed threshold
            if (idx + 1) / max_change_threshold > self.change_threshold:
                break

            phrase_masked_list = get_phrase_masked_list(
                text, [phrase_offsets[i]], [n_words_in_phrases[i]])[0]

            attack_results = []
            for j, masked_text in enumerate(phrase_masked_list):
                # 3. get masked token candidates from MLM

                encoded = self.tokenizer(masked_text,
                                         truncation=True,
                                         padding=True,
                                         return_token_type_ids=False,
                                         return_tensors='pt')

                input_ids = encoded['input_ids'].to(self.device)
                attention_mask = encoded['attention_mask'].to(self.device)
                mask_token_index = torch.where(
                    input_ids == self.tokenizer.mask_token_id)[-1]
                # skip if part or all of masks exceed max_length
                if len(mask_token_index) != j + 1:
                    continue

                candidates_list = []
                if len(phrase_masked_list) == 1:
                    input_ids[0, mask_token_index[
                        0]] = self.tokenizer.convert_tokens_to_ids(phrases[i])
                    #encoded = self.tokenizer(text,
                    #                       truncation=True,
                    #                       padding=True,
                    #                       return_token_type_ids=False,
                    #                       return_tensors='pt')
                    #input_ids = encoded['input_ids'].to(self.device)
                    #attention_mask = encoded['attention_mask'].to(self.device)
                    candidates_list = get_word_substitutes(
                        input_ids,
                        attention_mask,
                        mask_token_index,
                        self.tokenizer,
                        self.mlm_model,
                        K=self.k,
                        threshold=self.conf_thres)
                    entry['query_num'] += len(input_ids)
                elif len(phrase_masked_list) > 1:
                    candidates_list, qn = get_phrase_substitutes(
                        input_ids,
                        attention_mask,
                        mask_token_index,
                        self.stop_words,
                        self.tokenizer,
                        self.mlm_model,
                        self.device,
                        beam_width=self.beam_width,
                        K=self.k)
                    entry['query_num'] += qn

                mask_text = f" {' '.join([self.tokenizer.mask_token] * (j+1))} "
                for candidates in candidates_list:
                    perturbed_text = masked_text
                    candidate = ' '.join(candidates)

                    if phrases[i] == candidate:
                        continue

                    if '##' in candidate:
                        continue

                    if not phrase_is_wanted(self.stop_words, candidate):
                        continue

                    # replace the mask_text with candidate
                    perturbed_text = perturbed_text.replace(
                        mask_text, candidate, 1)

                    # semantic check -> if the phrase changes too much
                    #if len(candidates) > 1:
                    #seq_embeddings = self.sent_encoder([candidate, phrases[i]])
                    seq_embeddings = self.sent_encoder(
                        [perturbed_text, entry['text']])
                    semantic_sim = np.dot(*seq_embeddings)

                    if semantic_sim < self.sent_semantic_thres:
                        continue

                    importance_score, perturbed_label = get_important_scores(
                        [perturbed_text], self.tokenizer, self.target_model,
                        orig_label, max_prob, orig_probs, self.device)
                    importance_score = importance_score.squeeze()
                    entry['query_num'] += 1

                    perturbed_label = perturbed_label.squeeze()

                    if perturbed_label != orig_label:
                        attack_results = [
                            (perturbed_label == orig_label, j, candidate,
                             perturbed_text, importance_score)
                        ]
                        entry['success'] = True
                        if n_words_in_phrases[i] > 1:
                            entry['phrase_changes'] = phrase_changes + 1
                        entry[
                            'word_changes'] = word_changes + n_words_in_phrases[
                                i]
                        changes.append((phrases[i], candidate))
                        entry['changes'] = changes
                        entry['final_adv'] = perturbed_text
                        return entry

                    attack_results.append(
                        (perturbed_label == orig_label, j, candidate,
                         perturbed_text, importance_score))

            attack_results = sorted(attack_results,
                                    key=lambda x: x[-1],
                                    reverse=True)

            if len(attack_results) == 0:
                #print('no candidates for: ', phrases[i])
                continue

            # no matter what, changes plus 1
            if n_words_in_phrases[i] > 1:
                phrase_changes += 1
            word_changes += n_words_in_phrases[i]

            # attack the max confidence one when there's no success
            result = attack_results[0]

            text = result[3]

            n_words_in_phrases[i] = result[1] + 1

            # update perturbed token to phrases and phrase offsets
            length_diff = len(phrases[i]) - len(result[2])
            if length_diff != 0:
                new_offsets = phrase_offsets[:i]
                for change_i in range(i, len(phrases)):
                    start = phrase_offsets[change_i][0]
                    end = phrase_offsets[change_i][1] - length_diff
                    # start not change for index position
                    if change_i != i:
                        start -= length_diff
                    new_offsets.append([start, end])

                phrase_offsets = new_offsets

            changes.append((phrases[i], result[2]))
            phrases[i] = result[2]

            text = result[3]

        entry['success'] = False
        entry['phrase_changes'] = phrase_changes
        entry['word_changes'] = word_changes
        entry['changes'] = changes
        entry['final_adv'] = text

        return entry
Ejemplo n.º 11
0
def run():
    args = parser.parse_args()
    data = args.data
    nlayer = args.nlayer
    file_path = args.file_path  #'/content/drive/My Drive/Master_Final_Project/Genetic_attack/Code/nlp_adversarial_example_master_pytorch/glove.840B.300d.txt'#'/lustre/scratch/scratch/ucabdc3/lstm_attack'
    save_path = os.path.join(file_path, 'model_params')
    MAX_VOCAB_SIZE = 50000

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # with open(os.path.join(file_path, 'dataset_%d.pkl' %MAX_VOCAB_SIZE), 'rb') as f:
    #     dataset = pickle.load(f)
    with open('aux_files/dataset_%d.pkl' % MAX_VOCAB_SIZE, 'rb') as f:
        dataset = pickle.load(f)

    #    skip_list = np.load('aux_files/missed_embeddings_counter_%d.npy' %MAX_VOCAB_SIZE)
    embedding_matrix = np.load('aux_files/embeddings_glove_%d.npy' %
                               (MAX_VOCAB_SIZE))
    embedding_matrix = torch.tensor(embedding_matrix.T).to(device)
    # dist = np.load(('aux_files/dist_counter_%d.npy' %(MAX_VOCAB_SIZE)))
    # dist[0,:] = 100000
    # dist[:,0] = 100000
    #    goog_lm = LM()

    # pytorch
    max_len = 100
    #    padded_train_raw = pad_sequences(dataset.train_seqs2, maxlen = max_len, padding = 'post')
    #    padded_test_raw = pad_sequences(dataset.test_seqs2, maxlen = max_len, padding = 'post')
    #    # TrainSet
    #    data_set = Data_infor(padded_train_raw, dataset.train_y)
    #    num_train = len(data_set)
    #    indx = list(range(num_train))
    #    train_set = Subset(data_set, indx)
    if data.lower() == 'imdb':
        data_path = 'aclImdb'

    bert = BertModel.from_pretrained('bert-base-uncased')
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    data_processed = pre_processing(data_path, MAX_VOCAB_SIZE, max_len)
    tokenizer_select = args.tokenizer
    tokenizer_selection = tokenizer_select
    if tokenizer_selection.lower() != 'bert':
        data_processed.processing()
        train_sequences, test_sequences = data_processed.bert_indx(tokenizer)
        print('Self preprocessing')
    else:
        data_processed.bert_tokenize(tokenizer)
        train_sequences, test_sequences = data_processed.bert_indx(tokenizer)
        print('BERT tokenizer')
    train_text_init, test_text_init = data_processed.numerical(
        tokenizer, train_sequences, test_sequences)

    #    train_text = pad_sequences(train_text_init, maxlen = max_len, padding = 'post')
    test_text = pad_sequences(test_text_init, maxlen=max_len, padding='post')
    # orig_test_text = pad_sequences(dataset.test_seqs2, maxlen = max_len, padding = 'post')
    #    train_target = data_processed.all_train_labels
    test_target = data_processed.all_test_labels
    SAMPLE_SIZE = args.sample_size
    test_data, all_test_data = data_loading(test_text, test_target,
                                            SAMPLE_SIZE)

    # TestSet
    batch_size = 1

    #    data_set = Data_infor(padded_test_raw, dataset.test_y)
    #    num_test = len(data_set)
    #    indx = list(range(num_test))
    #
    ##    all_test_set  = Subset(data_set, indx)
    #    indx = random.sample(indx, SAMPLE_SIZE)
    #    test_set = Subset(data_set, indx)
    #
    #    test_loader = DataLoader(test_set, batch_size = batch_size, shuffle = False)

    test_loader_bert = DataLoader(test_data,
                                  batch_size=batch_size,
                                  shuffle=False)
    all_test_loader_bert = DataLoader(all_test_data,
                                      batch_size=128,
                                      shuffle=True)

    lstm_size = 128
    rnn_state_save = os.path.join(save_path, 'best_bert_0.7_0.001_bert_150')
    model = bert_lstm(
        bert, 2, False, nlayer, lstm_size, True, 0.7
    )  # batch_size=batch_size, embedding_matrix = embedding_matrix, hidden_size = lstm_size, kept_prob = 0.73, num_layers=2, bidirection=True)
    model.eval()
    model.load_state_dict(torch.load(rnn_state_save))
    model = model.to(device)

    model.eval()
    test_pred = torch.tensor([])
    test_targets = torch.tensor([])

    with torch.no_grad():
        for batch_index, (seqs, length,
                          target) in enumerate(all_test_loader_bert):
            seqs = seqs.type(torch.LongTensor)
            len_order = torch.argsort(length, descending=True)
            length = length[len_order]
            seqs = seqs[len_order]
            target = target[len_order]
            seqs, target, length = seqs.to(device), target.to(
                device), length.to(device)

            output, pred_out = model.pred(seqs, length, False)
            test_pred = torch.cat((test_pred, pred_out.cpu()), dim=0)
            test_targets = torch.cat(
                (test_targets, target.type(torch.float).cpu()))

        accuracy = model.evaluate_accuracy(test_pred.numpy(),
                                           test_targets.numpy())
    print('Test Accuracy:{:.4f}.'.format(accuracy))
    # np.save(os.path.join(save_path,'accuracy.npy'), np.array(accuracy))
    print('\n')
    n1 = 8
    n2 = 4
    pop_size = 60
    max_iters = 20
    n_prefix = 6
    n_suffix = 6
    batch_model = bert_lstm(
        bert, 2, False, nlayer, lstm_size, True, 0.7
    )  #SentimentAnalysis(batch_size=pop_size, embedding_matrix = embedding_matrix, hidden_size = lstm_size, kept_prob = 0.73, num_layers=2, bidirection=True)

    batch_model.eval()
    batch_model.load_state_dict(torch.load(rnn_state_save))
    batch_model.to(device)

    neighbour_model = bert_lstm(
        bert, 2, False, nlayer, lstm_size, True, 0.7
    )  #SentimentAnalysis(batch_size=batch_size, embedding_matrix = embedding_matrix, hidden_size = lstm_size, kept_prob = 0.73, num_layers=2, bidirection=True)

    neighbour_model.eval()
    neighbour_model.load_state_dict(torch.load(rnn_state_save))
    neighbour_model.to(device)
    lm_model = gpt_2_get_words_probs()
    ga_attack = GeneticAttack_pytorch(model,
                                      batch_model,
                                      neighbour_model,
                                      compute_dis,
                                      lm_model,
                                      tokenizer=tokenizer,
                                      max_iters=max_iters,
                                      dataset=dataset,
                                      pop_size=pop_size,
                                      n1=n1,
                                      n2=n2,
                                      n_prefix=n_prefix,
                                      n_suffix=n_suffix,
                                      use_lm=True,
                                      use_suffix=True)

    #     TEST_SIZE = args.test_size
    #     order_pre = 0
    #     n = 0
    #     seq_success = []
    #     seq_orig = []
    #     seq_orig_label = []
    #     word_varied = []

    # #    seq_success_path = os.path.join(save_path,'seq_success_perplexity_bert.npy')
    # #    seq_orig_path = os.path.join(save_path,'seq_orig_perplexity_bert.npy')
    # #    seq_orig_label_path = os.path.join(save_path,'seq_orig_label_perplexity_bert.npy')
    # #    word_varied_path = os.path.join(save_path,'word_varied_perplexity_bert.npy')

    # #    if order_pre != 0:
    # #      seq_success = np.load(seq_success_path, allow_pickle = True).tolist()
    # #      seq_orig = np.load(seq_orig_path).tolist()
    # #      seq_orig_label = np.load(seq_orig_label_path).tolist()
    # #      word_varied = np.load(word_varied_path, allow_pickle = True).tolist()
    # #      n = len(seq_success)

    #     for order, (seq, l, target) in enumerate(test_loader_bert):

    #       if order>=order_pre:

    #         seq_len = np.sum(np.sign(seq.numpy()))
    #         seq = seq.type(torch.LongTensor)
    #         seq, l = seq.to(device), l.to(device)
    #         model.eval()
    #         with torch.no_grad():
    #           orig_pred = np.argmax(model.pred(seq, l).cpu().detach().numpy())
    #         if orig_pred != target.numpy()[0]:
    # #          print('Wrong original prediction')
    # #          print('----------------------')
    #           continue
    #         if seq_len > 100:
    # #          print('Sequence is too long')
    # #          print('----------------------')
    #           continue
    #         print('Sequence number:{}'.format(order))
    #         print('Length of sentence: {}, Number of samples:{}'.format(l.item(), n+1))
    #         seq_orig.append(seq[0].cpu().detach().numpy())
    #         seq_orig_label.append(target.numpy()[0])
    #         target = int(1-target.numpy()[0])
    #         seq_success.append(ga_attack.attack(seq, target, l.type(torch.LongTensor)))

    #         if None not in np.array(seq_success[n]):
    #           w_be = [dataset.inv_dict[seq_orig[n][i]] for i in list(np.where(seq_success[n] != seq_orig[n])[0])]
    #           w_to = [dataset.inv_dict[seq_success[n][i]] for i in list(np.where(seq_success[n] != seq_orig[n])[0])]
    #           for i in range(len(w_be)):
    #             print('{} ----> {}'.format(w_be[i], w_to[i]))
    #           word_varied.append([w_be]+[w_to])
    #         else:
    #           print('Fail')
    #         print('----------------------')
    #         n += 1

    #         np.save(seq_success_path, np.array(seq_success))
    #         np.save(seq_orig_path, np.array(seq_orig))
    #         np.save(seq_orig_label_path, np.array(seq_orig_label))
    #         np.save(word_varied_path, np.array(word_varied, dtype=object))

    #         if n>TEST_SIZE:
    #           break
    TEST_SIZE = args.test_size
    order_pre = 0
    n = 0
    seq_success = []
    seq_orig = []
    seq_orig_label = []
    word_varied = []
    orig_list = []
    adv_list = []
    dist_list = []

    # seq_success_path = os.path.join(save_path,'seq_success_perplexity_bert.npy')
    # seq_orig_path = os.path.join(save_path,'seq_orig_perplexity_bert.npy')
    # seq_orig_label_path = os.path.join(save_path,'seq_orig_label_perplexity_bert.npy')
    # word_varied_path = os.path.join(save_path,'word_varied_perplexity_bert.npy')

    # if order_pre != 0:
    #   seq_success = np.load(seq_success_path, allow_pickle = True).tolist()
    #   seq_orig = np.load(seq_orig_path).tolist()
    #   seq_orig_label = np.load(seq_orig_label_path).tolist()
    #   word_varied = np.load(word_varied_path, allow_pickle = True).tolist()
    #   n = len(seq_success)
    for order, (seq, l, target) in enumerate(test_loader_bert):

        if order >= order_pre:
            seq_len = np.sum(np.sign(seq.numpy()))
            seq = seq.type(torch.LongTensor)
            seq, l = seq.to(device), l.to(device)
            model.eval()
            with torch.no_grad():
                prediction = model.pred(seq, l,
                                        False)[1].cpu().detach().numpy()
                orig_pred = np.argmax(prediction)
            if orig_pred != target:
                # print('Wrong original prediction')
                # print('----------------------')
                continue
            if seq_len > 100:
                # print('Sequence is too long')
                # print('----------------------')
                continue

            print('Sequence number:{}'.format(order))
            print('Predicted value:{}'.format(prediction))
            print('Length of sentence: {}, Number of samples:{}'.format(
                l.item(), n + 1))
            # seq_orig.append(seq[0].cpu().detach().numpy())
            # seq_orig_label.append(target.numpy()[0])
            target = int(1 - target)
            # seq_success.append(ga_attack.attack(seq, target, l.type(torch.LongTensor)))

            # if None not in np.array(seq_success[n]):
            #   w_be = [dataset.inv_dict[seq_orig[n][i]] for i in list(np.where(seq_success[n] != seq_orig[n])[0])]
            #   w_to = [dataset.inv_dict[seq_success[n][i]] for i in list(np.where(seq_success[n] != seq_orig[n])[0])]
            #   for i in range(len(w_be)):
            #     print('{} ----> {}'.format(w_be[i], w_to[i]))
            #   word_varied.append([w_be]+[w_to])
            # else:
            #   print('Fail')
            # print('----------------------')
            # n += 1

            # np.save(seq_success_path, np.array(seq_success))
            # np.save(seq_orig_path, np.array(seq_orig))
            # np.save(seq_orig_label_path, np.array(seq_orig_label))
            # np.save(word_varied_path, np.array(word_varied, dtype=object))

            # if n>TEST_SIZE:
            #   break
            # orig_list.append(seq[0].cpu().detach().numpy())

            x_adv, seq_out = ga_attack.attack(seq, target,
                                              l.type(torch.LongTensor))
            orig_list.append(seq)
            adv_list.append(x_adv)
            if x_adv is None:
                print('%d failed' % (order))
                dist_list.append(100000)
            else:
                num_changes = np.sum(np.array(seq_out) != np.array(x_adv))
                print('%d - %d changed.' % (order, num_changes))
                dist_list.append(num_changes)
                # display_utils.visualize_attack(sess, model, dataset, x_orig, x_adv)
                w_be = [
                    seq_out[i] for i in list(
                        np.where(np.array(seq_out) != np.array(x_adv))[0])
                ]
                w_to = [
                    x_adv[i] for i in list(
                        np.where(np.array(seq_out) != np.array(x_adv))[0])
                ]
                for i in range(len(w_be)):
                    print('{} ----> {}'.format(w_be[i], w_to[i]))
            print('--------------------------')

            n += 1
            if n > TEST_SIZE:
                break
            orig_len = [x.shape[1] for x in orig_list]
            normalized_dist_list = [
                dist_list[i] / orig_len[i] for i in range(len(orig_list))
            ]
            SUCCESS_THRESHOLD = 0.25
            successful_attacks = [
                x <= SUCCESS_THRESHOLD for x in normalized_dist_list
            ]
            print('Attack success rate : {:.2f}%'.format(
                np.mean(successful_attacks) * 100))
            SUCCESS_THRESHOLD = 0.2
            successful_attacks = [
                x <= SUCCESS_THRESHOLD for x in normalized_dist_list
            ]
            print('Attack success rate : {:.2f}%'.format(
                np.mean(successful_attacks) * 100))
Ejemplo n.º 12
0
    def search(
        self,
        query,
        top_n=5,
        use_top_n_sentences=20,
        rank_with_next_sentence_prediction=True,
    ):
        query_embedding = embed_sentences([query]).cpu()
        similarities = calculate_similarities(
            query_embedding,
            self._embeddings,
        )

        charity_similarities = pd.DataFrame({
            'charity': self._embeddings_charity_index,
            'similarity': similarities,
        })

        best_match_charities = (charity_similarities.sort_values(
            'similarity', ascending=False).groupby('charity').head(
                use_top_n_sentences).groupby('charity').mean().sort_values(
                    'similarity', ascending=False).head(top_n))
        best_match_indices = best_match_charities.index.tolist()

        matched_charities = [self._charities[i] for i in best_match_indices]

        if rank_with_next_sentence_prediction:
            descriptions = [
                charity.description for charity in matched_charities
            ]

            probabilities = calculate_next_sentence_probability(
                query,
                descriptions,
            )

            rank_indices = torch.argsort(probabilities).numpy()[::-1]

            charities = [matched_charities[i] for i in rank_indices]

            return [
                CharitySearchResult(
                    name=charity.name,
                    url=charity.url,
                    description=charity.description,
                    score=score,
                ) for (charity, score) in zip(
                    charities,
                    probabilities.tolist(),
                )
            ]
        else:
            matched_similarities = best_match_charities['similarity'].tolist()
            return [
                CharitySearchResult(
                    name=charity.name,
                    url=charity.url,
                    description=charity.description,
                    score=score,
                ) for (charity, score) in zip(
                    matched_charities,
                    matched_similarities,
                )
            ]
Ejemplo n.º 13
0
    def replay_buffer_training(self, sample, train_results, n):

        s, a, r, t, stag = [sample[k] for k in ['s', 'a', 'r', 't', 'stag']]

        self.train_mode()
        self.alpha = 0

        with torch.no_grad():
            self.pi_net(stag)
            pi_tag_1 = self.pi_net.sample(self.rbi_learner_samples)
            pi_tag_2 = self.pi_net.sample(self.rbi_learner_samples)
            q_target_1 = self.q_target_1(stag, pi_tag_1).mean(dim=0)
            q_target_2 = self.q_target_2(stag, pi_tag_2).mean(dim=0)

            log_pi_tag = self.pi_net.log_prob(torch.cat(
                [pi_tag_1, pi_tag_2])).mean(dim=0).sum(dim=1)

            q_target = torch.min(q_target_1,
                                 q_target_2) - self.alpha * log_pi_tag
            g = r + (1 - t) * self.gamma**self.n_steps * q_target

        if not n % self.rbi_delayed_policy_update:

            self.pi_net(s)
            pi = self.pi_net.rsample(self.rbi_learner_samples)

            # KL distance with update step

            beta = autograd.Variable(pi.data, requires_grad=True)

            qa_1 = self.q_net_1(s, beta)
            qa_2 = self.q_net_2(s, beta)
            qa = torch.min(qa_1, qa_2)

            gradients = autograd.grad(outputs=qa,
                                      inputs=beta,
                                      grad_outputs=torch.ones_like(qa),
                                      create_graph=False,
                                      retain_graph=False,
                                      only_inputs=True)[0]

            # calculate an alternative for the gradient
            lr = .001
            # beta = (beta + lr * gradients / torch.norm(gradients, dim=-1, keepdim=True)).detach()

            beta = clipped_gd(beta, gradients, lr, 1.).detach()

            log_pi = self.pi_net.log_prob(pi)
            log_beta = self.pi_net.log_prob(beta)

            with torch.no_grad():
                qa_1 = self.q_net_1(s, beta)
                qa_2 = self.q_net_2(s, beta)
                qatag = torch.min(qa_1, qa_2).unsqueeze(-1)

            cmin = 0.5
            cmax = 1.5

            rank = torch.argsort(torch.argsort(qatag, dim=0, descending=True),
                                 dim=0,
                                 descending=False)
            w = cmin * torch.ones_like(beta)
            m = int((1 - cmin) * n / (cmax - cmin))

            w += (cmax - cmin) * (rank < m).float()
            w += ((1 - cmin) * n - m * (cmax - cmin)) * (rank == m).float()

            # loss_p = (self.alpha * log_pi - log_beta).mean()
            loss_p = -(w * (log_beta - log_pi)).sum(dim=-1).mean(dim=0).sum()

            with torch.no_grad():
                entropy = self.pi_net.entropy().sum(dim=-1).mean()

            # numerical gradient (different score)

            # beta = autograd.Variable(pi.data, requires_grad=True)
            #
            # qa_1 = self.q_net_1(s, beta)
            # qa_2 = self.q_net_2(s, beta)
            # qa = torch.min(qa_1, qa_2)
            #
            # gradients = autograd.grad(outputs=qa, inputs=beta, grad_outputs=torch.ones_like(qa),
            #                           create_graph=False, retain_graph=False, only_inputs=True)[0]
            #
            # # calculate an alternative for the gradient
            # lr = 0.01
            # beta = (beta + lr * gradients).detach()
            #
            # with torch.no_grad():
            #     qa_1 = self.q_net_1(s, beta)
            #     qa_2 = self.q_net_2(s, beta)
            #     # qatag = torch.min(qa_1, qa_2)
            #     qatag = (qa_1 + qa_2) / 2
            #
            # dq = (qatag - qa.detach()) / torch.norm(lr * gradients, dim=-1, keepdim=True)
            # ngrad = gradients / torch.norm(gradients, dim=-1, keepdim=True)
            # gradients = dq.unsqueeze(-1) * ngrad
            #
            #
            # log_pi = self.pi_net.log_prob(pi).sum(dim=-1).mean(dim=0)
            # dq = (pi * gradients.detach()).sum(dim=-1).mean(dim=0)
            #
            # loss_p = (self.alpha * log_pi - dq).mean()
            #
            # with torch.no_grad():
            #     entropy = self.pi_net.entropy().sum(dim=-1).mean()

            # algernative gradient (same score)

            # beta = autograd.Variable(pi.data, requires_grad=True)
            #
            # qa_1 = self.q_net_1(s, beta)
            # qa_2 = self.q_net_2(s, beta)
            # qa = torch.min(qa_1, qa_2)
            #
            # gradients = autograd.grad(outputs=qa, inputs=beta, grad_outputs=torch.ones_like(qa),
            #                           create_graph=False, retain_graph=False, only_inputs=True)[0]
            #
            # log_pi = self.pi_net.log_prob(pi).sum(dim=-1).mean(dim=0)
            # dq = (pi * gradients.detach()).sum(dim=-1).mean(dim=0)
            #
            # loss_p = (self.alpha * log_pi - dq).mean()
            #
            # with torch.no_grad():
            #     entropy = self.pi_net.entropy().sum(dim=-1).mean()

            # ORIGINAL FORMULATION

            # qa_1 = self.q_net_1(s, pi).mean(dim=0)
            # qa_2 = self.q_net_2(s, pi).mean(dim=0)
            # qa = torch.min(qa_1, qa_2)
            #
            # log_pi = self.pi_net.log_prob(pi).mean(dim=0).sum(dim=1)
            #
            # loss_p = (self.alpha * log_pi - qa).mean()
            #
            # with torch.no_grad():
            #     entropy = self.pi_net.entropy().sum(dim=-1).mean()

            # entropy = self.pi_net.entropy().sum(dim=-1).mean()
            # loss_p -= 0 * entropy

            self.optimizer_p.zero_grad()
            loss_p.backward()
            if self.clip_p:
                nn.utils.clip_grad_norm(self.pi_net.parameters(), self.clip_p)
            self.optimizer_p.step()

            # alpha loss
            if self.entropy_tunning:
                alpha_loss = -(self.log_alpha *
                               (log_pi + self.target_entropy).detach()).mean()
                # alpha_loss = -(self.log_alpha * (-self.pi_net.entropy().sum(dim=1) + self.target_entropy).detach()).mean()

                self.optimizer_alpha.zero_grad()
                alpha_loss.backward()
                self.optimizer_alpha.step()

                self.alpha = float(self.log_alpha.exp())

            train_results['scalar']['alpha'].append(float(self.alpha))
            train_results['scalar']['objective'].append(float(-loss_p))
            train_results['scalar']['entropy'].append(float(entropy))
            # soft_update(self.pi_net, self.pi_target, self.tau)

        qa = self.q_net_1(s, a)
        loss_q_1 = F.mse_loss(qa, g, reduction='mean')

        qa = self.q_net_2(s, a)
        loss_q_2 = F.mse_loss(qa, g, reduction='mean')

        self.optimizer_q_1.zero_grad()
        loss_q_1.backward()
        if self.clip_q:
            nn.utils.clip_grad_norm(self.q_net_1.parameters(), self.clip_q)
        self.optimizer_q_1.step()

        self.optimizer_q_2.zero_grad()
        loss_q_2.backward()

        if self.clip_q:
            nn.utils.clip_grad_norm(self.q_net_2.parameters(), self.clip_q)
        self.optimizer_q_2.step()

        train_results['scalar']['loss_q_1'].append(float(loss_q_1))
        train_results['scalar']['loss_q_2'].append(float(loss_q_2))

        soft_update(self.q_net_1, self.q_target_1, self.tau)
        soft_update(self.q_net_2, self.q_target_2, self.tau)

        return train_results
Ejemplo n.º 14
0
def argsort(input, dim, descending):
    return th.argsort(input, dim=dim, descending=descending)
Ejemplo n.º 15
0
def compute_jaccard_distance(target_features,
                             k1=20,
                             k2=6,
                             cam_features=None,
                             print_flag=True,
                             search_option=3):
    end = time.time()
    N = target_features.size(0)
    if (search_option < 3):
        # accelerate matrix distance computing
        target_features = target_features.cuda()
    else:
        target_features = target_features.cpu()

    if print_flag:
        print('Computing original distance...')

    original_dist = torch.pow(target_features, 2).sum(dim=1, keepdim=True) * 2
    original_dist = original_dist.expand(
        N, N) - 2 * torch.mm(target_features, target_features.t())

    if (cam_features is not None):
        if (search_option < 3):
            # accelerate matrix distance computing
            cam_features = cam_features.cuda()
        else:
            cam_features = cam_features.cpu()
        cam_dist = torch.pow(cam_features, 2).sum(dim=1, keepdim=True) * 2
        cam_dist = cam_dist.expand(
            N, N) - 2 * torch.mm(cam_features, cam_features.t())
        original_dist -= 0.1 * cam_dist
        del cam_dist

    original_dist /= original_dist.max(0)[0]
    original_dist = original_dist.t()
    initial_rank = torch.argsort(original_dist, dim=-1)

    original_dist = original_dist.cpu()
    initial_rank = initial_rank.cpu()
    all_num = gallery_num = original_dist.size(0)

    del target_features

    if print_flag:
        print('Computing Jaccard distance...')

    nn_k1 = []
    nn_k1_half = []
    for i in range(all_num):
        nn_k1.append(k_reciprocal_neigh(initial_rank, i, k1))
        nn_k1_half.append(
            k_reciprocal_neigh(initial_rank, i, int(np.around(k1 / 2))))

    V = torch.zeros(all_num, all_num)
    for i in range(all_num):
        k_reciprocal_index = nn_k1[i]
        k_reciprocal_expansion_index = k_reciprocal_index
        for candidate in k_reciprocal_index:
            candidate_k_reciprocal_index = nn_k1_half[candidate]
            if (len(
                    np.intersect1d(candidate_k_reciprocal_index,
                                   k_reciprocal_index)) >
                    2 / 3 * len(candidate_k_reciprocal_index)):
                k_reciprocal_expansion_index = torch.cat(
                    (k_reciprocal_expansion_index,
                     candidate_k_reciprocal_index))

        k_reciprocal_expansion_index = torch.unique(
            k_reciprocal_expansion_index)  ## element-wise unique
        weight = torch.exp(-original_dist[i, k_reciprocal_expansion_index])
        V[i, k_reciprocal_expansion_index] = weight / torch.sum(weight)

    if k2 != 1:
        k2_rank = initial_rank[:, :k2].clone().view(-1)
        V_qe = V[k2_rank]
        V_qe = V_qe.view(initial_rank.size(0), k2, -1).sum(1)
        V_qe /= k2
        V = V_qe
        del V_qe
    del initial_rank

    invIndex = []
    for i in range(gallery_num):
        invIndex.append(torch.nonzero(V[:, i])[:, 0])  #len(invIndex)=all_num

    jaccard_dist = torch.zeros_like(original_dist)
    for i in range(all_num):
        temp_min = torch.zeros(1, gallery_num)
        indNonZero = torch.nonzero(V[i, :])[:, 0]
        indImages = []
        indImages = [invIndex[ind] for ind in indNonZero]
        for j in range(len(indNonZero)):
            temp_min[0, indImages[j]] = temp_min[0, indImages[j]] + torch.min(
                V[i, indNonZero[j]], V[indImages[j], indNonZero[j]])
        jaccard_dist[i] = 1 - temp_min / (2 - temp_min)
    del invIndex

    del V

    pos_bool = (jaccard_dist < 0)
    jaccard_dist[pos_bool] = 0.0
    if print_flag:
        print("Time cost: {}".format(time.time() - end))
    return jaccard_dist
    output = Q11 + Q21 + Q12 + Q22

    return output

if __name__ == '__main__':
    # Set the device
    if torch.cuda.is_available():
        device = torch.device("cuda:0")
    else:
        device = torch.device("cpu")
        print("WARNING: CPU only, this will be slow!")

    image = image_loader("umbrella.jpg", 228)
    print(image.shape)


    model = torchvision.models.resnet18(pretrained=True)
    model.to(device)

    outputs = model(image)

    print(torch.argsort(outputs))
    print(dict([*model.named_modules()]).keys())
    fe = FeatureExtractor(model, ["conv1", "layer2.0.conv1", "layer4.1.conv2"])
    features = fe.add_features(image)
    print(features.shape)
    #print(fe(image)["conv1", "layer4.1.conv2"].shape)
    plt.imshow(features.squeeze()[675].detach().cpu().numpy())
    plt.show()

Ejemplo n.º 17
0
    def test_step(model, test_triples, all_true_triples, args):
        '''
        Evaluate the model on test or valid datasets
        '''

        model.eval()

        # Otherwise use standard (filtered) MRR, MR, HITS@1, HITS@3, and HITS@10 metrics
        # Prepare dataloader for evaluation
        test_dataloader_head = DataLoader(
            TestDataset(test_triples, all_true_triples, args.nentity,
                        args.nrelation, 'head-batch'),
            batch_size=args.test_batch_size,
            num_workers=max(1, args.cpu_num // 2),
            collate_fn=TestDataset.collate_fn)

        test_dataloader_tail = DataLoader(
            TestDataset(test_triples, all_true_triples, args.nentity,
                        args.nrelation, 'tail-batch'),
            batch_size=args.test_batch_size,
            num_workers=max(1, args.cpu_num // 2),
            collate_fn=TestDataset.collate_fn)

        test_dataset_list = [test_dataloader_head, test_dataloader_tail]

        logs = []

        step = 0
        total_steps = sum([len(dataset) for dataset in test_dataset_list])

        with torch.no_grad():
            for test_dataset in test_dataset_list:
                for positive_sample, negative_sample, filter_bias, mode in test_dataset:
                    if args.cuda:
                        positive_sample = positive_sample.cuda()
                        negative_sample = negative_sample.cuda()
                        filter_bias = filter_bias.cuda()

                    batch_size = positive_sample.size(0)

                    score = model((positive_sample, negative_sample), mode)
                    score += filter_bias

                    # Explicitly sort all the entities to ensure that there is no test exposure bias
                    argsort = torch.argsort(score, dim=1, descending=True)

                    if mode == 'head-batch':
                        positive_arg = positive_sample[:, 0]
                    elif mode == 'tail-batch':
                        positive_arg = positive_sample[:, 2]
                    else:
                        raise ValueError('mode %s not supported' % mode)

                    for i in range(batch_size):
                        # Notice that argsort is not ranking
                        ranking = (argsort[i, :] == positive_arg[i]).nonzero()
                        assert ranking.size(0) == 1

                        # ranking + 1 is the true ranking used in evaluation metrics
                        ranking = 1 + ranking.item()
                        logs.append({
                            'MRR': 1.0 / ranking,
                            'MR': float(ranking),
                            'HITS@1': 1.0 if ranking <= 1 else 0.0,
                            'HITS@3': 1.0 if ranking <= 3 else 0.0,
                            'HITS@10': 1.0 if ranking <= 10 else 0.0,
                        })

                    if step % args.test_log_steps == 0:
                        logging.info('Evaluating the model... (%d/%d)' %
                                     (step, total_steps))

                    step += 1

        metrics = {}
        for metric in logs[0].keys():
            metrics[metric] = sum([log[metric] for log in logs]) / len(logs)

        return metrics
Ejemplo n.º 18
0
def speech_collate(batch, pad_val=0.0):
    r"""Puts each data field into a tensor with outer dimension batch size"""

    # split features and keys
    utt_keys = []
    inpt_batch = []
    target_batch = []
    speaker_ints = []

    for b in batch:
        # append values
        utt_keys.append(b["utt_key"])
        inpt_batch.append(b["inpt_feat"])
        if "target_feat" in b:
            target_batch.append(b["target_feat"])
        speaker_ints.append(b["speaker_int"])

    # max seq length
    seq_len = [b.size(0) for b in inpt_batch]
    max_seq = max(seq_len)

    # pad to max length
    inpt_batch = [
        ConstantPad1d((0, int(max_seq - b.size(0))),
                      value=pad_val)(b.transpose(0, 1)) for b in inpt_batch
    ]

    # sort seq & get sorted indices
    indices = torch.argsort(torch.tensor(seq_len), descending=True)
    seq_len.sort(reverse=True)

    # sort batch (descending order) for torch.rnn compatibility
    inpt_batch = [inpt_batch[i] for i in indices]

    inpt_batch = torch.stack(inpt_batch, dim=0)

    # (B, f, T) -> (B, T, f)
    inpt_batch = inpt_batch.permute(0, 2, 1)

    # rearrange speaker ints and utt_keys to match batches
    speaker_ints = torch.tensor([speaker_ints[i] for i in indices])

    utt_keys = [utt_keys[i] for i in indices]

    # Batch Dict
    batch_dict = {
        "utt_keys": utt_keys,
        "seq_len": seq_len,
        "input_batch": inpt_batch,
        "speaker_ints": speaker_ints
    }

    if "target_feat" in batch[0]:
        target_batch = [
            ConstantPad1d((0, int(max_seq - b.size(0))),
                          value=pad_val)(b.transpose(0, 1))
            for b in target_batch
        ]

        target_batch = [target_batch[i] for i in indices]

        target_batch = torch.stack(target_batch, dim=0)

        # (B, f, T) -> (B, T, f)
        batch_dict["target_batch"] = target_batch.permute(0, 2, 1)

    return batch_dict
Ejemplo n.º 19
0
    def argsort(self, x, dim=-1):

        return torch.argsort(x, dim=dim)
Ejemplo n.º 20
0
def ctc_beam_search_decoder(log_probs_seq,
                            lm_scorer=None,
                            beam_size=100,
                            blank=0,
                            cutoff_prob=1.0,
                            cutoff_top_n=None):
    """
    Performs prefix beam search on the output of a CTC network.

    Args:
        log_probs_seq (tensor): The log probabilities. Should be a 2D array (timesteps x alphabet_size)
        lm_scorer (func): Language model function. Should take as input a string and output a
            probability.
        beam_size (int): The beam width. Will keep the `beam_size` most likely candidates at each
            timestep.
        blank (int): Blank label index
        cutoff_prob: Cutoff probability for pruning. Defaults to `1.0`, meaning no pruning
        cutoff_top_n: Cutoff number for pruning.

    Retruns:
        string: The decoded CTC output.
    """
    T, V = log_probs_seq.shape
    log_cutoff_prob = math.log(cutoff_prob)
    cutoff_top_n = min(cutoff_top_n, V) if cutoff_top_n else V

    beams = Beams(is_valid=lm_scorer.is_valid if lm_scorer else None)

    for t in range(T):

        log_probs = log_probs_seq[t]

        curr_beams = list(beams.items())

        # A default dictionary to store the next step candidates.
        num_prefixes = len(curr_beams)

        # min_cutoff = curr_beams[-1][-1]['score_ctc'] + log_probs[blank]

        min_cutoff = curr_beams[-1][-1].score_ctc + log_probs[blank]

        # Prunning step
        pruned_indexes = torch.arange(len(log_probs)).tolist()
        if log_cutoff_prob < 0.0 or cutoff_top_n < V:
            idxs = torch.argsort(log_probs, descending=True)
            n_idxs = min(
                (logcumsumexp(log_probs[idxs], 0) <= log_cutoff_prob).sum(),
                cutoff_top_n, V)
            pruned_indexes = idxs[:n_idxs].tolist()

        for token_index in pruned_indexes:

            p = log_probs[token_index].item()

            # The variables p_b and p_nb are respectively the
            # probabilities for the prefix given that it ends in a
            # blank and does not end in a blank at this time step.
            for prefix, beam in curr_beams:
                # p_b, p_nb = beam['p_b'], beam['p_nb']
                p_b, p_nb = beam.p_b, beam.p_nb

                # if (num_prefixes == beam_size) and p + beam['score_ctc'] < min_cutoff:
                if (num_prefixes
                        == beam_size) and p + beam.score_ctc < min_cutoff:
                    break

                # If we propose a blank the prefix doesn't change. Only the probability of ending
                # in blank gets updated.
                if token_index == blank:
                    # beam['n_p_b'] = np.logaddexp(beam['n_p_b'], beam['score_ctc'] + p)
                    beam.n_p_b = np.logaddexp(beam.n_p_b, beam.score_ctc + p)
                    continue

                # Extend the prefix by the new character s and add it to the beam[' Only'] the
                # probability of not ending in blank gets updated.
                last_token_index = prefix[-1] if prefix else None

                if token_index == last_token_index:
                    # If s is repeated at the end we also update the unchanged prefix. This is the
                    # merging case.
                    # beam['n_p_nb'] = np.logaddexp(beam['n_p_nb'], p_nb + p)
                    beam.n_p_nb = np.logaddexp(beam.n_p_nb, p_nb + p)

                n_prefix = prefix + (token_index, )

                # Must update state for prefix search
                n_beam = beams.getitem(n_prefix, previous_beam=beam)
                if not n_beam:
                    continue

                # n_p_b, n_p_nb = n_beam['n_p_b'], n_beam['n_p_nb']
                n_p_b, n_p_nb = n_beam.n_p_b, n_beam.n_p_nb

                if token_index == last_token_index and p_b > -float('inf'):
                    # We don't include the previous probability of not ending in blank (p_nb)
                    # if s is repeated at the end. The CTC algorithm merges characters not
                    # separated by a blank.
                    n_p_nb = np.logaddexp(n_p_nb, p_b + p)
                elif token_index != last_token_index:
                    # n_p_nb = np.logaddexp(n_p_nb, beam['score_ctc'] + p)
                    n_p_nb = np.logaddexp(n_p_nb, beam.score_ctc + p)

                if lm_scorer:
                    # LM scorer has access and updates the state variable
                    # p_lm = lm_scorer(n_prefix, n_beam['state'])
                    # n_beam['score_lm'] = beam['score_lm'] + p_lm
                    p_lm = lm_scorer(n_prefix, n_beam.state)
                    n_beam.score_lm = beam.score_lm + p_lm

                # n_beam['n_p_b'] = n_p_b
                # n_beam['n_p_nb'] = n_p_nb
                n_beam.n_p_b = n_p_b
                n_beam.n_p_nb = n_p_nb

        # Update the probabilities
        beams.step()
        # Trim the beam before moving on to the next time-step.
        beams.topk_(beam_size)

    # score the eos
    # TODO improve this step (better readability)
    if lm_scorer:
        for prefix, beam in beams.items():
            if prefix:
                # p_lm = lm_scorer(prefix, beam['state'], eos=True)
                # beam['score_lm'] += p_lm
                p_lm = lm_scorer(prefix, beam.state, eos=True)
                beam.score_lm += p_lm

    # Return the top beam_size -log probabilities without the lm scoring
    # return [(-beam['score_ctc'], p, beam['timesteps']) for p, beam in beams.sort()]
    return [(-beam.score_ctc, p, beam.timesteps) for p, beam in beams.sort()]
Ejemplo n.º 21
0
 def argsort(self, dim=None, descending=False):
     r"""See :func: `torch.argsort`"""
     return torch.argsort(self, dim, descending)
Ejemplo n.º 22
0
import torch

a = torch.randn(4, 4)
print(a)

sa = torch.argsort(a, descending=True)
print(sa)
print(sa[:, :2])
Ejemplo n.º 23
0
    def _get_new_words(self, current_text, indices_to_modify):
        """Get replacement words for the word we want to replace using BAE
        method.

        Args:
            current_text (AttackedText): Text we want to get replacements for.
            indices_to_modify (list[int]): list of word indices where we want to insert
        """
        masked_texts = []
        for index in indices_to_modify:
            masked_text = current_text.insert_text_before_word_index(
                index, self._lm_tokenizer.mask_token)
            # Obtain window
            masked_text = masked_text.text_window_around_index(
                index, self.window_size)
            masked_texts.append(masked_text)

        i = 0
        # 2-D list where for each index to modify we have a list of replacement words
        new_words = []
        while i < len(masked_texts):
            inputs = self._encode_text(masked_texts[i:i + self.batch_size])
            ids = inputs["input_ids"].tolist()
            with torch.no_grad():
                preds = self._language_model(**inputs)[0]

            for j in range(len(ids)):
                try:
                    # Need try-except b/c mask-token located past max_length might be truncated by tokenizer
                    masked_index = ids[j].index(
                        self._lm_tokenizer.mask_token_id)
                except ValueError:
                    new_words.append([])
                    continue

                mask_token_logits = preds[j, masked_index]
                mask_token_probs = torch.softmax(mask_token_logits, dim=0)
                ranked_indices = torch.argsort(mask_token_probs,
                                               descending=True)
                top_words = []
                for _id in ranked_indices:
                    _id = _id.item()
                    word = self._lm_tokenizer.convert_ids_to_tokens(_id)
                    if utils.check_if_subword(
                            word,
                            self._language_model.config.model_type,
                        (masked_index == 1),
                    ):
                        word = utils.strip_BPE_artifacts(
                            word, self._language_model.config.model_type)
                    if (mask_token_probs[_id] >= self.min_confidence
                            and utils.is_one_word(word)
                            and not utils.check_if_punctuations(word)):
                        top_words.append(word)

                    if (len(top_words) >= self.max_candidates
                            or mask_token_probs[_id] < self.min_confidence):
                        break

                new_words.append(top_words)

            i += self.batch_size

        return new_words
Ejemplo n.º 24
0
                                add_special_tokens=False,
                                return_tensors="pt").input_ids.to(device)
        predictions_aff_a = model(input_ids,
                                  decoder_input_ids=decoder_ids).logits
    input_ids = torch.tensor(
        [tokenizer.encode(neg_a[i], add_special_tokens=True)]).to(device)
    with torch.no_grad():
        decoder_ids = tokenizer("<pad> <extra_id_0>",
                                add_special_tokens=False,
                                return_tensors="pt").input_ids.to(device)
        predictions_neg_a = model(input_ids,
                                  decoder_input_ids=decoder_ids).logits

    aff_a_preds = []
    predictions_aff_a = torch.softmax(predictions_aff_a[0, 1],
                                      dim=0)  # 1 is position of <extra_id_0>
    top_inds = torch.argsort(predictions_aff_a,
                             descending=True)[:5].cpu().numpy()
    for top_ind in top_inds:
        aff_a_preds.append(tokenizer.decode([top_ind]))

    neg_a_preds = []
    predictions_neg_a = torch.softmax(predictions_neg_a[0, 1],
                                      dim=0)  # 1 is position of <extra_id_0>
    top_inds = torch.argsort(predictions_neg_a,
                             descending=True)[:5].cpu().numpy()
    for top_ind in top_inds:
        neg_a_preds.append(tokenizer.decode([top_ind]))

    print(aff_a_preds, neg_a_preds)
Ejemplo n.º 25
0
        metrics[f'{stage}_{metric}'] = []

for epoch in range(1, EPOCHS + 1):

    for data_loader in [data_loader_train, data_loader_test]:
        metrics_epoch = {key: [] for key in metrics.keys()}

        stage = 'train'
        if data_loader == data_loader_test:
            stage = 'test'

        for x, y, lengths in data_loader:

            x = x.float().to(DEVICE)
            y = y.float().to(DEVICE)
            idxes = torch.argsort(lengths, descending=True)
            lengths = lengths[idxes]
            max_len = int(lengths.max())
            # sort sentences by length desc and slice
            # in x last word is either empty or 'END', and in y it is shifted first word)
            x = x[idxes, :max_len]
            y = y[idxes, :max_len]
            x_packed = pack_padded_sequence(x, lengths, batch_first=True)
            y_packed = pack_padded_sequence(y, lengths, batch_first=True)

            y_prim_packed = model.forward(x_packed)

            weights = torch.from_numpy(dataset_full.weights[torch.argmax(
                y_packed.data, dim=1).cpu().numpy()])
            weights = weights.unsqueeze(dim=1).to(DEVICE)
            loss = -torch.mean(
Ejemplo n.º 26
0
 def inverse_permutation(self):
     return torch.argsort(self.permutation)
Ejemplo n.º 27
0
    def test_box_decomposition_list(self):
        ref_point_raw = torch.zeros(3, device=self.device)
        pareto_Y_raw = torch.tensor(
            [
                [1.0, 2.0, 1.0],
                [2.0, 0.5, 1.0],
            ],
            device=self.device,
        )
        for m, dtype in product((2, 3), (torch.float, torch.double)):
            ref_point = ref_point_raw[:m].to(dtype=dtype)
            pareto_Y = pareto_Y_raw[:, :m].to(dtype=dtype)
            pareto_Y_list = [pareto_Y[:0, :m], pareto_Y[:, :m]]
            bds = [
                FastNondominatedPartitioning(ref_point=ref_point, Y=Y)
                for Y in pareto_Y_list
            ]
            bd = BoxDecompositionList(*bds)
            # test pareto Y
            bd_pareto_Y_list = bd.pareto_Y
            pareto_Y1 = pareto_Y_list[1]
            expected_pareto_Y1 = (pareto_Y1[torch.argsort(-pareto_Y1[:, 0])]
                                  if m == 2 else pareto_Y1)
            self.assertTrue(torch.equal(bd_pareto_Y_list[0], pareto_Y_list[0]))
            self.assertTrue(
                torch.equal(bd_pareto_Y_list[1], expected_pareto_Y1))
            # test ref_point
            self.assertTrue(
                torch.equal(bd.ref_point,
                            ref_point.unsqueeze(0).expand(2, -1)))
            # test get_hypercell_bounds
            cell_bounds = bd.get_hypercell_bounds()
            expected_cell_bounds1 = bds[1].get_hypercell_bounds()
            self.assertTrue(
                torch.equal(cell_bounds[:, 1], expected_cell_bounds1))
            # the first pareto set in the list is empty so the cell bounds
            # should contain one cell that spans the entire area (bounded by the
            # ref_point) and then empty cells, bounded from above and below by the
            # ref point.
            expected_cell_bounds0 = torch.zeros_like(expected_cell_bounds1)
            # set the upper bound for the first cell to be inf
            expected_cell_bounds0[1, 0, :] = float("inf")
            self.assertTrue(
                torch.equal(cell_bounds[:, 0], expected_cell_bounds0))
            # test compute_hypervolume
            expected_hv = torch.stack([b.compute_hypervolume() for b in bds],
                                      dim=0)
            hv = bd.compute_hypervolume()
            self.assertTrue(torch.equal(expected_hv, hv))

            # test update with batched tensor
            new_Y = torch.empty(2, 1, m, dtype=dtype, device=self.device)
            new_Y[0] = 1
            new_Y[1] = 3
            bd.update(new_Y)
            bd_pareto_Y_list = bd.pareto_Y
            self.assertTrue(torch.equal(bd_pareto_Y_list[0], new_Y[0]))
            self.assertTrue(torch.equal(bd_pareto_Y_list[1], new_Y[1]))

            # test update with list
            bd = BoxDecompositionList(*bds)
            bd.update([new_Y[0], new_Y[1]])
            bd_pareto_Y_list = bd.pareto_Y
            self.assertTrue(torch.equal(bd_pareto_Y_list[0], new_Y[0]))
            self.assertTrue(torch.equal(bd_pareto_Y_list[1], new_Y[1]))

            # test update with wrong shape
            bd = BoxDecompositionList(*bds)
            with self.assertRaises(BotorchTensorDimensionError):
                bd.update(new_Y.unsqueeze(0))
Ejemplo n.º 28
0
    noisy_img_list.append(noisy_img)
for step in range(num_steps):
    step_loss = 0
    step_fitloss = 0
    for i in range(num_image):
        img1 = image_list[i]
        noisy_img1 = noisy_img_list[i]
        x = im2col(img1, (patch_size, patch_size))
       # x_noisy = im2col(noisy_img1,(patch_size,patch_size))
        x = torch.tensor(x, dtype=torch.float).cuda()
       # x_noisy = torch.tensor(x_noisy, dtype=torch.float).cuda()
        for batch_index in range(batchs_size):
            ref_ind = np.random.randint(x.shape[1])# pick random ref patch
            x_ref = x[:, ref_ind:ref_ind + 1]
            norms = torch.norm((x_ref-x), dim=0)# norm matrix x is clean patch_image
            match_inds = torch.argsort(norms)[1:num_matches + 1] # 5 number match
            un_match_inds = torch.argsort(norms)[50:50+num_unmatches]
           # x_ref1 = x_noisy[:, ref_ind:ref_ind + 1]
            x_matched = x[:, match_inds]
            x_unmatched = x[:,un_match_inds]
           # loss_fit = torch.mean(torch.norm(hard_thresh(W @ x_ref, threshold) - hard_thresh(W @ x_matched, threshold), dim=0)) \
           #        - torch.mean(torch.norm(hard_thresh(W @ x_ref, threshold) - hard_thresh(W @ x_unmatched, threshold), dim=0))
            loss_fit = torch.mean(torch.norm(W@x_ref-W@x_matched,dim=0))-torch.mean(torch.norm(W@x_ref-W@x_unmatched,dim=0))
            loss_reg = - gamma_0 * W.slogdet()[1] + gamma_1 * torch.sum(torch.abs(W)**2)#torch.norm(W)#torch.sum((W)**2)
            loss = loss_fit + loss_reg
            step_loss += loss.item()
            step_fitloss += loss_fit.item()
           # W1 = W.detach().numpy()
           # cond.append(np.linalg.cond(W1))
            opti.zero_grad()
            loss.backward()
Ejemplo n.º 29
0
    def get_seg_single(self,
                       cate_preds,
                       seg_preds,
                       kernel_preds,
                       featmap_size,
                       img_shape,
                       ori_shape,
                       scale_factor,
                       cfg,
                       rescale=False,
                       debug=False):

        assert len(cate_preds) == len(kernel_preds)

        # overall info.
        h, w, _ = img_shape
        upsampled_size_out = (featmap_size[0] * 4, featmap_size[1] * 4)

        # process.
        inds = (cate_preds > cfg.score_thr)
        cate_scores = cate_preds[inds]
        if len(cate_scores) == 0:
            return None

        # cate_labels & kernel_preds
        inds = inds.nonzero()
        cate_labels = inds[:, 1]
        kernel_preds = kernel_preds[inds[:, 0]]

        # trans vector.
        size_trans = cate_labels.new_tensor(
            self.seg_num_grids).pow(2).cumsum(0)
        strides = kernel_preds.new_ones(size_trans[-1])

        n_stage = len(self.seg_num_grids)
        strides[:size_trans[0]] *= self.strides[0]
        for ind_ in range(1, n_stage):
            strides[size_trans[ind_ -
                               1]:size_trans[ind_]] *= self.strides[ind_]
        strides = strides[inds[:, 0]]

        # mask encoding.
        I, N = kernel_preds.shape
        kernel_preds = kernel_preds.view(I, N, 1, 1)
        seg_preds = F.conv2d(seg_preds, kernel_preds,
                             stride=1).squeeze(0).sigmoid()
        # mask.
        seg_masks = seg_preds > cfg.mask_thr
        sum_masks = seg_masks.sum((1, 2)).float()

        # filter.
        keep = sum_masks > strides
        if keep.sum() == 0:
            return None

        seg_masks = seg_masks[keep, ...]
        seg_preds = seg_preds[keep, ...]
        sum_masks = sum_masks[keep]
        cate_scores = cate_scores[keep]
        cate_labels = cate_labels[keep]

        # mask scoring.
        seg_scores = (seg_preds * seg_masks.float()).sum((1, 2)) / sum_masks
        cate_scores *= seg_scores

        # sort and keep top nms_pre
        sort_inds = torch.argsort(cate_scores, descending=True)
        if len(sort_inds) > cfg.nms_pre:
            sort_inds = sort_inds[:cfg.nms_pre]
        seg_masks = seg_masks[sort_inds, :, :]
        seg_preds = seg_preds[sort_inds, :, :]
        sum_masks = sum_masks[sort_inds]
        cate_scores = cate_scores[sort_inds]
        cate_labels = cate_labels[sort_inds]

        # Matrix NMS
        cate_scores = matrix_nms(seg_masks,
                                 cate_labels,
                                 cate_scores,
                                 kernel=cfg.kernel,
                                 sigma=cfg.sigma,
                                 sum_masks=sum_masks)

        # filter.
        keep = cate_scores >= cfg.update_thr
        if keep.sum() == 0:
            return None
        seg_preds = seg_preds[keep, :, :]
        cate_scores = cate_scores[keep]
        cate_labels = cate_labels[keep]

        # sort and keep top_k
        sort_inds = torch.argsort(cate_scores, descending=True)
        if len(sort_inds) > cfg.max_per_img:
            sort_inds = sort_inds[:cfg.max_per_img]
        seg_preds = seg_preds[sort_inds, :, :]
        cate_scores = cate_scores[sort_inds]
        cate_labels = cate_labels[sort_inds]

        seg_preds = F.interpolate(seg_preds.unsqueeze(0),
                                  size=upsampled_size_out,
                                  mode='bilinear')[:, :, :h, :w]
        seg_masks = F.interpolate(seg_preds,
                                  size=ori_shape[:2],
                                  mode='bilinear').squeeze(0)
        seg_masks = seg_masks > cfg.mask_thr
        return seg_masks, cate_labels, cate_scores
Ejemplo n.º 30
0
    def forward(self, data, final, start):
        x, edge_index, pos, batch = data.x, data.edge_index, data.pos, data.batch

        x_start = x

        # Only street based pooling
        if self.clustering == 'Street':
            batchClusters1 = self.clusters1
            batchCat = self.categories
            batchClusters2 = self.clusters2

            batch_size = torch.max(batch) + 1

            # Divide clusters and categories from different batches
            for i in range(1, batch_size):
                batchClusters1 = torch.cat(
                    (batchClusters1, self.clusters1 + i * self.maxCluster1))
                batchCat = torch.cat((batchCat, self.categories + i * 5))
                batchClusters2 = torch.cat(
                    (batchClusters2, self.clusters2 + i * self.maxCluster2))

            batchCat = batchCat.long()
            data.batch = batchCat

            data2 = data

            # Both pooled branches, max pooling
            data = max_pool(batchClusters1, data)
            x_t, edge_index_t, pos_t, batchCat_t = data.x, data.edge_index, data.pos, data.batch

            data2 = max_pool(batchClusters2, data2)
            x_t2, edge_index_t2, pos_t2, batchCat_t2 = data2.x, data2.edge_index, data2.pos, data2.batch

            edge_index_t, temp = add_self_loops(edge_index_t)
            edge_index_t2, temp = add_self_loops(edge_index_t2)

            # Add coordinates and categories to input
            if self.coords:
                cats = (batchCat % 5).float()
                catsT = (batchCat_t % 5).float()
                catsT2 = (batchCat_t2 % 5).float()

                normPos = pos / torch.max(pos)
                normPos_t = pos_t / torch.max(pos_t)
                normPos_t2 = pos_t2 / torch.max(pos_t2)
                normCat = (cats / 4).view(batchCat.size(0), 1)
                normCat_t = (catsT / 4).view(batchCat_t.size(0), 1)
                normCat_t2 = (catsT2 / 4).view(batchCat_t2.size(0), 1)

                x = torch.cat((x, normPos, normCat), 1)
                x_t = torch.cat((x_t, normPos_t, normCat_t), 1)
                x_t2 = torch.cat((x_t2, normPos_t2, normCat_t2), 1)

            # Perform convolution blocks in all 3 branches
            for i in range(self.layers):
                x_temp = x
                x = self.moduleList1[i](x, edge_index)
                if self.midSkip:
                    x = torch.cat((x, x_temp), 1)
                    if i == 0:
                        bn = self.bn
                    elif i == 1:
                        bn = self.bn2
                    else:
                        bn = self.bn3

                    x = F.relu(bn(self.skipList1[i](x, edge_index)))

            for i in range(self.layers):
                x_ttemp = x_t
                x_t = self.moduleList2[i](x_t, edge_index_t)
                if self.midSkip:
                    x_t = torch.cat((x_t, x_ttemp), 1)
                    if i == 0:
                        bn = self.bn
                    elif i == 1:
                        bn = self.bn2
                    else:
                        bn = self.bn3
                    x_t = F.relu(bn(self.skipList2[i](x_t, edge_index_t)))

            for i in range(self.layers):
                x_ttemp2 = x_t2
                x_t2 = self.moduleList3[i](x_t2, edge_index_t2)
                if self.midSkip:
                    x_t2 = torch.cat((x_t2, x_ttemp2), 1)
                    if i == 0:
                        bn = self.bn
                    elif i == 1:
                        bn = self.bn2
                    else:
                        bn = self.bn3
                    x_t2 = F.relu(bn(self.skipList3[i](x_t2, edge_index_t2)))

            # Calculate knn weights of both pooled branches for first batch (and last, since the size might be different)
            if start:
                sorter = torch.argsort(batchCat)
                backsorter = torch.argsort(sorter)

                pos = pos[sorter]
                batchCat = batchCat[sorter]

                pairs = knn(pos_t,
                            pos,
                            self.knn,
                            batch_x=batchCat_t,
                            batch_y=batchCat)
                yIdx, xIdx = pairs
                diff = pos_t[xIdx] - pos[yIdx]
                squared_distance = (diff * diff).sum(dim=-1, keepdim=True)
                weights = 1.0 / torch.clamp(squared_distance, min=1e-16)

                pairs2 = knn(pos_t2,
                             pos,
                             self.knn,
                             batch_x=batchCat_t2,
                             batch_y=batchCat)
                yIdx2, xIdx2 = pairs2
                diff2 = pos_t2[xIdx2] - pos[yIdx2]
                squared_distance2 = (diff2 * diff2).sum(dim=-1, keepdim=True)
                weights2 = 1.0 / torch.clamp(squared_distance2, min=1e-16)

                self.weights = weights
                self.xIdx = xIdx
                self.yIdx = yIdx

                self.weights2 = weights2
                self.xIdx2 = xIdx2
                self.yIdx2 = yIdx2
                self.backSorter = backsorter

            if final:
                sorter = torch.argsort(batchCat)
                backsorter = torch.argsort(sorter)

                pos = pos[sorter]
                batchCat = batchCat[sorter]

                pairs = knn(pos_t,
                            pos,
                            self.knn,
                            batch_x=batchCat_t,
                            batch_y=batchCat)
                yIdx, xIdx = pairs
                diff = pos_t[xIdx] - pos[yIdx]
                squared_distance = (diff * diff).sum(dim=-1, keepdim=True)
                weights = 1.0 / torch.clamp(squared_distance, min=1e-16)

                pairs2 = knn(pos_t2,
                             pos,
                             self.knn,
                             batch_x=batchCat_t2,
                             batch_y=batchCat)
                yIdx2, xIdx2 = pairs2
                diff2 = pos_t2[xIdx2] - pos[yIdx2]
                squared_distance2 = (diff2 * diff2).sum(dim=-1, keepdim=True)
                weights2 = 1.0 / torch.clamp(squared_distance2, min=1e-16)

                self.weights = weights
                self.xIdx = xIdx
                self.yIdx = yIdx

                self.weights2 = weights2
                self.xIdx2 = xIdx2
                self.yIdx2 = yIdx2
                self.backSorter = backsorter

            # Unpool pooled branches
            x_t = scatter_add(x_t[self.xIdx] * self.weights,
                              self.yIdx,
                              dim=0,
                              dim_size=pos.size(0))
            x_t = x_t / scatter_add(
                self.weights, self.yIdx, dim=0, dim_size=pos.size(0))

            x_t = x_t[self.backSorter]

            x_t2 = scatter_add(x_t2[self.xIdx2] * self.weights2,
                               self.yIdx2,
                               dim=0,
                               dim_size=pos.size(0))
            x_t2 = x_t2 / scatter_add(
                self.weights2, self.yIdx2, dim=0, dim_size=pos.size(0))

            x_t2 = x_t2[self.backSorter]

            # Input size of final convolution
            if self.skipconv:
                y = torch.cat((x, x_t, x_t2, x_start), 1)
            else:
                y = torch.cat((x, x_t, x_t2), 1)

            # Do final convolution
            y = self.conv_mix(y, edge_index)

            # Add dropout layer
            if self.p != 1:
                y = F.dropout(y, training=self.training, p=self.p)

        return y