コード例 #1
0
ファイル: loss.py プロジェクト: codealphago/weakalign
 def forward(self, theta, matches, return_outliers=False):
     if isinstance(theta,Variable): # handle normal batch transformations
         batch_size=theta.size()[0]
         theta=theta.clone()
         mask = self.geometricTnf(expand_dim(self.mask_id,0,batch_size),theta)
         if return_outliers:
             mask_outliers = self.geometricTnf(expand_dim(1.0-self.mask_id,0,batch_size),theta)
         if self.normalize:
             epsilon=1e-5
             mask = torch.div(mask,
                              torch.sum(torch.sum(torch.sum(mask+epsilon,3),2),1).unsqueeze(1).unsqueeze(2).unsqueeze(3).expand_as(mask))
             if return_outliers:
                 mask_outliers = torch.div(mask_outliers,
                                           torch.sum(torch.sum(torch.sum(mask_outliers+epsilon,3),2),1).unsqueeze(1).unsqueeze(2).unsqueeze(3).expand_as(mask_outliers))
         score = torch.sum(torch.sum(torch.sum(torch.mul(mask,matches),3),2),1)
         if return_outliers:
             score_outliers = torch.sum(torch.sum(torch.sum(torch.mul(mask_outliers,matches),3),2),1)
             return (score,score_outliers)
     elif isinstance(theta,list): # handle multiple transformations per batch item, batch is in list format (used for RANSAC)
         batch_size = len(theta)
         score = []
         for b in range(batch_size):
             sample_size=theta[b].size(0)
             s=self.forward(theta[b],expand_dim(matches[b,:,:,:].unsqueeze(0),0,sample_size))
             score.append(s)
     return score
コード例 #2
0
 def forward(self, x, targets):
     batchSize = x.size(0)
     K = x.size(1)-1
     Pnt = 1 / float(self.nLem)
     Pns = 1 / float(self.nLem)
     
     # eq 5.1 : P(origin=model) = Pmt / (Pmt + k*Pnt) 
     Pmt = x.select(1,0)
     Pmt_div = Pmt.add(K * Pnt + eps)
     lnPmt = torch.div(Pmt, Pmt_div)
     
     # eq 5.2 : P(origin=noise) = k*Pns / (Pms + k*Pns)
     Pon_div = x.narrow(1,1,K).add(K * Pns + eps)
     Pon = Pon_div.clone().fill_(K * Pns)
     lnPon = torch.div(Pon, Pon_div)
  
     # equation 6 in ref. A
     lnPmt.log_()
     lnPon.log_()
     
     lnPmtsum = lnPmt.sum(0)
     lnPonsum = lnPon.view(-1, 1).sum(0)
     
     loss = - (lnPmtsum + lnPonsum) / batchSize
     
     return loss
コード例 #3
0
ファイル: search.py プロジェクト: fyabc/fairseq
    def step(self, step, lprobs, scores):
        super()._init_buffers(lprobs)
        bsz, beam_size, vocab_size = lprobs.size()

        if step == 0:
            # at the first step all hypotheses are equally likely, so use
            # only the first beam
            lprobs = lprobs[:, ::beam_size, :].contiguous()
        else:
            # make probs contain cumulative scores for each hypothesis
            lprobs.add_(scores[:, :, step - 1].unsqueeze(-1))

        torch.topk(
            lprobs.view(bsz, -1),
            k=min(
                # Take the best 2 x beam_size predictions. We'll choose the first
                # beam_size of these which don't predict eos to continue with.
                beam_size * 2,
                lprobs.view(bsz, -1).size(1) - 1,  # -1 so we never select pad
            ),
            out=(self.scores_buf, self.indices_buf),
        )
        torch.div(self.indices_buf, vocab_size, out=self.beams_buf)
        self.indices_buf.fmod_(vocab_size)
        return self.scores_buf, self.indices_buf, self.beams_buf
コード例 #4
0
    def updateOutput(self, input, y):
        input1, input2 = input[0], input[1]

        # keep backward compatibility
        if self.buffer is None:
            self.buffer = input1.new()
            self.w1 = input1.new()
            self.w22 = input1.new()
            self.w = input1.new()
            self.w32 = input1.new()
            self._outputs = input1.new()

            # comparison operators behave differently from cuda/c implementations
            # TODO: verify name
            if input1.type() == 'torch.cuda.FloatTensor':
                self._idx = torch.cuda.ByteTensor()
            else:
                self._idx = torch.ByteTensor()

        torch.mul(input1, input2, out=self.buffer)
        torch.sum(self.buffer, 1, out=self.w1, keepdim=True)

        epsilon = 1e-12
        torch.mul(input1, input1, out=self.buffer)
        torch.sum(self.buffer, 1, out=self.w22, keepdim=True).add_(epsilon)
        # self._outputs is also used as a temporary buffer
        self._outputs.resize_as_(self.w22).fill_(1)
        torch.div(self._outputs, self.w22, out=self.w22)
        self.w.resize_as_(self.w22).copy_(self.w22)

        torch.mul(input2, input2, out=self.buffer)
        torch.sum(self.buffer, 1, out=self.w32, keepdim=True).add_(epsilon)
        torch.div(self._outputs, self.w32, out=self.w32)
        self.w.mul_(self.w32)
        self.w.sqrt_()

        torch.mul(self.w1, self.w, out=self._outputs)
        self._outputs = self._outputs.select(1, 0)

        torch.eq(y, -1, out=self._idx)
        self._outputs[self._idx] = self._outputs[self._idx].add_(-self.margin).clamp_(min=0)
        torch.eq(y, 1, out=self._idx)
        self._outputs[self._idx] = self._outputs[self._idx].mul_(-1).add_(1)

        self.output = self._outputs.sum().item()

        if self.sizeAverage:
            self.output = self.output / y.size(0)

        return self.output
コード例 #5
0
def normalize_batch(batch):
    # normalize using imagenet mean and std
    mean = batch.data.new(batch.data.size())
    std = batch.data.new(batch.data.size())
    mean[:, 0, :, :] = 0.485
    mean[:, 1, :, :] = 0.456
    mean[:, 2, :, :] = 0.406
    std[:, 0, :, :] = 0.229
    std[:, 1, :, :] = 0.224
    std[:, 2, :, :] = 0.225
    batch = torch.div(batch, 255.0)
    batch -= Variable(mean)
    # batch /= Variable(std)
    batch = torch.div(batch,Variable(std))
    return batch
コード例 #6
0
ファイル: mol_decoder.py プロジェクト: nair-p/sdvae
    def forward(self, true_binary, rule_masks, raw_logits):
        if cmd_args.loss_type == 'binary':
            exp_pred = torch.exp(raw_logits) * rule_masks

            norm = F.torch.sum(exp_pred, 2, keepdim=True)
            prob = F.torch.div(exp_pred, norm)

            return F.binary_cross_entropy(prob, true_binary) * cmd_args.max_decode_steps

        if cmd_args.loss_type == 'perplexity':
            return my_perp_loss(true_binary, rule_masks, raw_logits)

        if cmd_args.loss_type == 'vanilla':
            exp_pred = torch.exp(raw_logits) * rule_masks + 1e-30
            norm = torch.sum(exp_pred, 2, keepdim=True)
            prob = torch.div(exp_pred, norm)

            ll = F.torch.abs(F.torch.sum( true_binary * prob, 2))
            mask = 1 - rule_masks[:, :, -1]
            logll = mask * F.torch.log(ll)

            loss = -torch.sum(logll) / true_binary.size()[1]
            
            return loss
        print('unknown loss type %s' % cmd_args.loss_type)
        raise NotImplementedError
コード例 #7
0
ファイル: models.py プロジェクト: wangwang110/ARAE
    def encode(self, indices, lengths, noise):
        embeddings = self.embedding(indices)
        packed_embeddings = pack_padded_sequence(input=embeddings,
                                                 lengths=lengths,
                                                 batch_first=True)

        # Encode
        packed_output, state = self.encoder(packed_embeddings)

        hidden, cell = state
        # batch_size x nhidden
        hidden = hidden[-1]  # get hidden state of last layer of encoder

        # normalize to unit ball (l2 norm of 1) - p=2, dim=1
        norms = torch.norm(hidden, 2, 1)
        
        # For older versions of PyTorch use:
        hidden = torch.div(hidden, norms.expand_as(hidden))
        # For newest version of PyTorch (as of 8/25) use this:
        # hidden = torch.div(hidden, norms.unsqueeze(1).expand_as(hidden))

        if noise and self.noise_radius > 0:
            gauss_noise = torch.normal(means=torch.zeros(hidden.size()),
                                       std=self.noise_radius)
            hidden = hidden + to_gpu(self.gpu, Variable(gauss_noise))

        return hidden
コード例 #8
0
ファイル: models.py プロジェクト: mircean/ML
 def forward(self, X, X_mask):
     #X: [m, Tx] m = batch size, Tx = word count
     #print(X.size(), type(X))
     m = X.size()[0]
     Tx = X.size()[1]
     
     X = self.embedding(X)
     #X: [m, Tx, embedding_dim] m = batch size, Tx = word count
     #print(X.size(), type(X.data))
     assert X.size() == torch.Size([m, Tx, self.embedding_dim])
             
     #average words in doc. use mask so we average only words not padding
     X = torch.sum(X, 1)
     X = Variable(torch.div(X.data, X_mask))
     #X: [m, emb_dim]
     #print(X.size(), type(X.data))
     assert X.size() == torch.Size([m, self.embedding_dim])
     
     X = self.linear(X)
     #X: [m, 1]
     #print(X.size(), type(X))
     if self.num_classes == 2:
         assert X.size() == torch.Size([m, 1])
     else:
         assert X.size() == torch.Size([m, self.num_classes])
         
     if self.num_classes == 2:
         X = torch.squeeze(X)
         X = self.sigmoid(X)
         #X: [m]
         #print(X.size(), type(X))
         assert X.size() == torch.Size([m])
         return X
     else:
         return F.softmax(X)
コード例 #9
0
ファイル: custom_loss.py プロジェクト: nair-p/sdvae
    def backward(ctx, grad_output):
        """
        In the backward pass we receive a Tensor containing the gradient of the loss
        with respect to the output, and we need to compute the gradient of the loss
        with respect to the input.
        """
        true_binary, rule_masks, input_logits  = ctx.saved_tensors

        b = F.torch.max(input_logits, 2, keepdim=True)[0]
        raw_logits = input_logits - b
        exp_pred = torch.exp(raw_logits) * rule_masks + cmd_args.prob_fix

        norm = torch.sum(exp_pred, 2, keepdim=True)
        prob = torch.div(exp_pred, norm)

        grad_matrix1 = grad_matrix2 = None
        
        grad_matrix3 = prob - true_binary
        
        rescale = 1.0
        if not cmd_args.old_loss:
            rescale = 1.0 / true_binary.size()[1]
        grad_matrix3 = grad_matrix3 * rule_masks * grad_output.data * rescale

        return grad_matrix1, grad_matrix2, Variable(grad_matrix3)
コード例 #10
0
ファイル: custom_loss.py プロジェクト: nair-p/sdvae
    def forward(ctx, true_binary, rule_masks, input_logits):
        ctx.save_for_backward(true_binary, rule_masks, input_logits)

        b = F.torch.max(input_logits, 2, keepdim=True)[0]
        raw_logits = input_logits - b
        exp_pred = torch.exp(raw_logits) * rule_masks + cmd_args.prob_fix

        norm = torch.sum(exp_pred, 2, keepdim=True)
        prob = torch.div(exp_pred, norm)

        ll = F.torch.abs(F.torch.sum( true_binary * prob, 2))
        
        mask = 1 - rule_masks[:, :, -1]

        logll = mask * F.torch.log(ll)

        if cmd_args.old_loss:
            nnz = torch.sum(mask)
            loss = -torch.sum(logll) / nnz
        else:
            loss = -torch.sum(logll) / true_binary.size()[1]
        
        if input_logits.is_cuda:
            return torch.Tensor([loss]).cuda()
        else:
            return torch.Tensor([loss])
コード例 #11
0
ファイル: cabasc.py プロジェクト: coder352/shellscript
    def forward(self, inputs):

        # inputs
        text_raw_indices, aspect_indices, x_l, x_r = inputs[0], inputs[1], inputs[2], inputs[3]
        memory_len = torch.sum(text_raw_indices != 0, dim = -1)
        aspect_len = torch.sum(aspect_indices != 0, dim = -1)

        # aspect representation
        nonzeros_aspect = torch.tensor(aspect_len, dtype=torch.float).to(self.opt.device)
        aspect = self.embed(aspect_indices)
        aspect = torch.sum(aspect, dim=1)
        aspect = torch.div(aspect, nonzeros_aspect.view(nonzeros_aspect.size(0), 1))
        x = aspect.unsqueeze(dim=1)

        # memory module
        memory = self.embed(text_raw_indices)  # n x d
        memory = self.squeeze_embedding(memory, memory_len)  # 默认是 batch_first

        # sentence representation
        nonzeros_memory = torch.tensor(memory_len, dtype=torch.float).to(self.opt.device)
        v_s = torch.sum(memory, dim = 1)
        v_s = torch.div(v_s, nonzeros_memory.view(nonzeros_memory.size(0),1))
        v_s = v_s.unsqueeze(dim=1)

        # position attention module
        if type == 'c': memory = self.locationed_memory(memory, memory_len, left_len, aspect_len)
        elif type == 'cabasc': 
            # context attention
            memory = self.context_attention(x_l, x_r, memory, memory_len, aspect_len)
            # recalculate sentence rep with new memory
            v_s = torch.sum(memory, dim = 1)
            v_s = torch.div(v_s, nonzeros_memory.view(nonzeros_memory.size(0),1))
            v_s = v_s.unsqueeze(dim=1)

        # content attention module
        for _ in range(self.opt.hops):
            # x = self.x_linear(x)
            v_ts = self.attention(memory, x)

        # classifier
        v_ns = v_ts + v_s                                 # embedd the sentence
        v_ns = v_ns.view(v_ns.size(0), -1)
        v_ms = F.tanh(self.mlp(v_ns))
        out = self.dense(v_ms)
        out = F.softmax(out, dim=-1)

        return out
コード例 #12
0
ファイル: adamhd.py プロジェクト: chrinide/py_ml_utils
    def step(self, closure=None):
        """Performs a single optimization step.

        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        """
        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:
            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data
                if grad.is_sparse:
                    raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead')

                state = self.state[p]

                # State initialization
                if len(state) == 0:
                    state['step'] = 0
                    # Exponential moving average of gradient values
                    state['exp_avg'] = torch.zeros_like(p.data)
                    # Exponential moving average of squared gradient values
                    state['exp_avg_sq'] = torch.zeros_like(p.data)

                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
                beta1, beta2 = group['betas']

                state['step'] += 1

                if group['weight_decay'] != 0:
                    grad = grad.add(group['weight_decay'], p.data)

                if state['step'] > 1:
                    prev_bias_correction1 = 1 - beta1 ** (state['step'] - 1)
                    prev_bias_correction2 = 1 - beta2 ** (state['step'] - 1)
                    # Hypergradient for Adam:
                    h = torch.dot(grad.view(-1), torch.div(exp_avg, exp_avg_sq.sqrt().add_(group['eps'])).view(-1)) * math.sqrt(prev_bias_correction2) / prev_bias_correction1
                    # Hypergradient descent of the learning rate:
                    tmp = group['hypergrad_lr'] * h
                    group['lr'] += tmp.double().cpu()

                # Decay the first and second moment running average coefficient
                exp_avg.mul_(beta1).add_(1 - beta1, grad)
                exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
                denom = exp_avg_sq.sqrt().add_(group['eps'])

                bias_correction1 = 1 - beta1 ** state['step']
                bias_correction2 = 1 - beta2 ** state['step']
                step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1

                p.data.addcdiv_(-step_size, exp_avg, denom)

        return loss
コード例 #13
0
ファイル: Euclidean.py プロジェクト: Jsmilemsj/pytorch
    def updateGradInput(self, input, gradOutput):
        if self.gradInput is None:
            return

        if self._div is None:
            self._div = input.new()
        if self._output is None:
            self._output = self.output.new()
        if self._gradOutput is None:
            self._gradOutput = input.new()
        if self._expand3 is None:
            self._expand3 = input.new()

        if not self.fastBackward:
            self.updateOutput(input)

        inputSize, outputSize = self.weight.size(0), self.weight.size(1)

        """
        dy_j   -2 * (w_j - x)     x - w_j
        ---- = ---------------- = -------
         dx    2 || w_j - x ||      y_j
        """

        # to prevent div by zero (NaN) bugs
        self._output.resize_as_(self.output).copy_(self.output).add_(0.0000001)
        self._view(self._gradOutput, gradOutput, gradOutput.size())
        torch.div(gradOutput, self._output, out=self._div)
        assert input.dim() == 2
        batchSize = input.size(0)

        self._div.resize_(batchSize, 1, outputSize)
        self._expand3 = self._div.expand(batchSize, inputSize, outputSize)

        if torch.typename(input) == 'torch.cuda.FloatTensor':
            self._repeat2.resize_as_(self._expand3).copy_(self._expand3)
            self._repeat2.mul_(self._repeat)
        else:
            torch.mul(self._repeat, self._expand3, out=self._repeat2)

        torch.sum(self._repeat2, 2, True, out=self.gradInput)
        self.gradInput.resize_as_(input)

        return self.gradInput
コード例 #14
0
    def sample(self, fc_feats, att_feats, opt={}):
        sample_max = opt.get('sample_max', 1)
        beam_size = opt.get('beam_size', 1)
        temperature = opt.get('temperature', 1.0)
        if beam_size > 1:
            return self.sample_beam(fc_feats, att_feats, opt)

        batch_size = fc_feats.size(0)
        state = self.init_hidden(batch_size)

        # embed fc and att feats
        fc_feats = self.fc_embed(fc_feats)
        _att_feats = self.att_embed(att_feats.view(-1, self.att_feat_size))
        att_feats = _att_feats.view(*(att_feats.size()[:-1] + (self.rnn_size,)))

        # Project the attention feats first to reduce memory and computation comsumptions.
        p_att_feats = self.ctx2att(att_feats.view(-1, self.rnn_size))
        p_att_feats = p_att_feats.view(*(att_feats.size()[:-1] + (self.att_hid_size,)))

        seq = []
        seqLogprobs = []
        for t in range(self.seq_length + 1):
            if t == 0: # input <bos>
                it = fc_feats.data.new(batch_size).long().zero_()
            elif sample_max:
                sampleLogprobs, it = torch.max(logprobs.data, 1)
                it = it.view(-1).long()
            else:
                if temperature == 1.0:
                    prob_prev = torch.exp(logprobs.data).cpu() # fetch prev distribution: shape Nx(M+1)
                else:
                    # scale logprobs by temperature
                    prob_prev = torch.exp(torch.div(logprobs.data, temperature)).cpu()
                it = torch.multinomial(prob_prev, 1).cuda()
                sampleLogprobs = logprobs.gather(1, Variable(it, requires_grad=False)) # gather the logprobs at sampled positions
                it = it.view(-1).long() # and flatten indices for downstream processing

            xt = self.embed(Variable(it, requires_grad=False))

            if t >= 1:
                # stop when all finished
                if t == 1:
                    unfinished = it > 0
                else:
                    unfinished = unfinished * (it > 0)
                if unfinished.sum() == 0:
                    break
                it = it * unfinished.type_as(it)
                seq.append(it) #seq[t] the input of t+2 time step

                seqLogprobs.append(sampleLogprobs.view(-1))

            output, state = self.core(xt, fc_feats, att_feats, p_att_feats, state)
            logprobs = F.log_softmax(self.logit(output))

        return torch.cat([_.unsqueeze(1) for _ in seq], 1), torch.cat([_.unsqueeze(1) for _ in seqLogprobs], 1)
コード例 #15
0
ファイル: loss.py プロジェクト: Northrend/pytorch
    def forward(ctx, input1, input2, y, margin, size_average):
        ctx.margin = margin
        ctx.size_average = size_average
        ctx.w1 = input1.new()
        ctx.w22 = input1.new()
        ctx.w = input1.new()
        ctx.w32 = input1.new()
        ctx._outputs = input1.new()

        _idx = input1.new().byte()

        buffer = torch.mul(input1, input2)
        torch.sum(buffer, 1, out=ctx.w1, keepdim=True)

        epsilon = 1e-12
        torch.mul(input1, input1, out=buffer)
        torch.sum(buffer, 1, out=ctx.w22, keepdim=True).add_(epsilon)

        ctx._outputs.resize_as_(ctx.w22).fill_(1)
        torch.div(ctx._outputs, ctx.w22, out=ctx.w22)
        ctx.w.resize_as_(ctx.w22).copy_(ctx.w22)

        torch.mul(input2, input2, out=buffer)
        torch.sum(buffer, 1, out=ctx.w32, keepdim=True).add_(epsilon)
        torch.div(ctx._outputs, ctx.w32, out=ctx.w32)
        ctx.w.mul_(ctx.w32)
        ctx.w.sqrt_()

        torch.mul(ctx.w1, ctx.w, out=ctx._outputs)
        ctx._outputs = ctx._outputs.select(1, 0)

        torch.eq(y, -1, out=_idx)
        ctx._outputs[_idx] = ctx._outputs[_idx].add_(-ctx.margin).clamp_(min=0)
        torch.eq(y, 1, out=_idx)
        ctx._outputs[_idx] = ctx._outputs[_idx].mul_(-1).add_(1)

        output = ctx._outputs.sum()

        if ctx.size_average:
            output = output / y.size(0)

        ctx.save_for_backward(input1, input2, y)
        return input1.new((output,))
コード例 #16
0
ファイル: eval.py プロジェクト: minizhao/GAN-for-text
def mask_probabilities(probs, bin_,bins,bins_num):
    mask_words = bins[bin_]
    mask_words = list(set(mask_words))

    divided_probs = torch.div(probs, bins_num)
    numpy_divided_probs = divided_probs.cpu().data.numpy()
    numpy_probs = probs.cpu().data.numpy()
    numpy_probs[:,mask_words] = numpy_divided_probs[:,mask_words]
    probs.data = torch.FloatTensor(numpy_probs).cuda()
    return probs
コード例 #17
0
ファイル: eval.py プロジェクト: minizhao/GAN-for-text
def log_softmax(unnormalized_probs, bin_,bins,bins_num):

    # col softmax
    denom = torch.sum(unnormalized_probs.exp(), 1) # denom is a 200 * 1 tensor
    denom = (denom.expand(unnormalized_probs.size(1),denom.size(0))).permute(1,0).contiguous()
    probs = torch.div(unnormalized_probs.exp(), denom)

    if bins_num >= 2:
        probs=mask_probabilities(probs, bin_,bins,bins_num)
    log_probs = torch.log(probs)
    return log_probs # output is a n * vocab tensor
コード例 #18
0
ファイル: loss.py プロジェクト: athiwatp/pytorch
    def forward(self, input1, input2, y):
        self.w1 = input1.new()
        self.w22 = input1.new()
        self.w = input1.new()
        self.w32 = input1.new()
        self._outputs = input1.new()

        _idx = input1.new().byte()

        buffer = torch.mul(input1, input2)
        torch.sum(buffer, 1, out=self.w1, keepdim=True)

        epsilon = 1e-12
        torch.mul(input1, input1, out=buffer)
        torch.sum(buffer, 1, out=self.w22, keepdim=True).add_(epsilon)

        self._outputs.resize_as_(self.w22).fill_(1)
        torch.div(self._outputs, self.w22, out=self.w22)
        self.w.resize_as_(self.w22).copy_(self.w22)

        torch.mul(input2, input2, out=buffer)
        torch.sum(buffer, 1, out=self.w32, keepdim=True).add_(epsilon)
        torch.div(self._outputs, self.w32, out=self.w32)
        self.w.mul_(self.w32)
        self.w.sqrt_()

        torch.mul(self.w1, self.w, out=self._outputs)
        self._outputs = self._outputs.select(1, 0)

        torch.eq(y, -1, out=_idx)
        self._outputs[_idx] = self._outputs[_idx].add_(-self.margin).clamp_(min=0)
        torch.eq(y, 1, out=_idx)
        self._outputs[_idx] = self._outputs[_idx].mul_(-1).add_(1)

        output = self._outputs.sum()

        if self.size_average:
            output = output / y.size(0)

        self.save_for_backward(input1, input2, y)
        return input1.new((output,))
コード例 #19
0
ファイル: eval_util.py プロジェクト: codealphago/weakalign
def mean_dist(source_points,warped_points,L_pck):
    # compute precentage of correct keypoints
    batch_size=source_points.size(0)
    dist=torch.zeros((batch_size))
    for i in range(batch_size):
        p_src = source_points[i,:]
        p_wrp = warped_points[i,:]
        N_pts = torch.sum(torch.ne(p_src[0,:],-1)*torch.ne(p_src[1,:],-1))
        point_distance = torch.pow(torch.sum(torch.pow(p_src[:,:N_pts]-p_wrp[:,:N_pts],2),0),0.5)
        L_pck_mat = L_pck[i].expand_as(point_distance)
        dist[i]=torch.mean(torch.div(point_distance,L_pck_mat))
    return dist
コード例 #20
0
ファイル: copy_generator.py プロジェクト: Unbabel/OpenNMT-py
    def _compute_loss(self, batch, output, target, copy_attn, align):
        """Compute the loss.

        The args must match :func:`self._make_shard_state()`.

        Args:
            batch: the current batch.
            output: the predict output from the model.
            target: the validate target to compare output with.
            copy_attn: the copy attention value.
            align: the align info.
        """

        target = target.view(-1)
        align = align.view(-1)
        scores = self.generator(
            self._bottle(output), self._bottle(copy_attn), batch.src_map
        )
        loss = self.criterion(scores, align, target)

        # this block does not depend on the loss value computed above
        # and is used only for stats
        scores_data = collapse_copy_scores(
            self._unbottle(scores.clone(), batch.batch_size),
            batch, self.tgt_vocab, batch.dataset.src_vocabs)
        scores_data = self._bottle(scores_data)

        # this block does not depend on the loss value computed above
        # and is used only for stats
        # Correct target copy token instead of <unk>
        # tgt[i] = align[i] + len(tgt_vocab)
        # for i such that tgt[i] == 0 and align[i] != 0
        target_data = target.clone()
        unk = self.criterion.unk_index
        correct_mask = (target_data == unk) & (align != unk)
        offset_align = align[correct_mask] + len(self.tgt_vocab)
        target_data[correct_mask] += offset_align

        # Compute sum of perplexities for stats
        stats = self._stats(loss.sum().clone(), scores_data, target_data)

        # this part looks like it belongs in CopyGeneratorLoss
        if self.normalize_by_length:
            # Compute Loss as NLL divided by seq length
            tgt_lens = batch.tgt[:, :, 0].ne(self.padding_idx).sum(0).float()
            # Compute Total Loss per sequence in batch
            loss = loss.view(-1, batch.batch_size).sum(0)
            # Divide by length of each sequence and sum
            loss = torch.div(loss, tgt_lens).sum()
        else:
            loss = loss.sum()

        return loss, stats
コード例 #21
0
ファイル: trainer.py プロジェクト: Fresh-Z/mtcnn_pytorch
    def compute_accuracy(self, prob_cls, gt_cls):
        #we only need the detection which >= 0
        prob_cls = torch.squeeze(prob_cls)
        mask = torch.ge(gt_cls, 0)
        #get valid element
        valid_gt_cls = torch.masked_select(gt_cls, mask)
        valid_prob_cls = torch.masked_select(prob_cls, mask)
        size = min(valid_gt_cls.size()[0], valid_prob_cls.size()[0])
        prob_ones = torch.ge(valid_prob_cls, 0.6).float()
        right_ones = torch.eq(prob_ones, valid_gt_cls.float()).float()

        return torch.div(torch.mul(torch.sum(right_ones), float(1.0)), float(size))
コード例 #22
0
ファイル: loss.py プロジェクト: codealphago/weakalign
    def forward(self, theta_aff, theta_aff_tps, matches,return_outliers=False):
        batch_size=theta_aff.size()[0]
        mask = self.compGeometricTnf(image_batch=expand_dim(self.mask_id,0,batch_size),
                                     theta_aff=theta_aff,
                                     theta_aff_tps=theta_aff_tps)
        if return_outliers:
             mask_outliers = self.compGeometricTnf(image_batch=expand_dim(1.0-self.mask_id,0,batch_size),
                                                   theta_aff=theta_aff,
                                                   theta_aff_tps=theta_aff_tps)
        if self.normalize:
            epsilon=1e-5
            mask = torch.div(mask,
                             torch.sum(torch.sum(torch.sum(mask+epsilon,3),2),1).unsqueeze(1).unsqueeze(2).unsqueeze(3).expand_as(mask))
            if return_outliers:
                mask_outliers = torch.div(mask,
                             torch.sum(torch.sum(torch.sum(mask_outliers+epsilon,3),2),1).unsqueeze(1).unsqueeze(2).unsqueeze(3).expand_as(mask_outliers)) 
        score = torch.sum(torch.sum(torch.sum(torch.mul(mask,matches),3),2),1)

        if return_outliers:
            score_outliers = torch.sum(torch.sum(torch.sum(torch.mul(mask_outliers,matches),3),2),1)
            return (score,score_outliers)
        return score
コード例 #23
0
ファイル: custom_loss.py プロジェクト: nair-p/sdvae
    def forward(ctx, true_binary, rule_masks, input_logits):
        ctx.save_for_backward(true_binary, rule_masks, input_logits)

        b = F.torch.max(input_logits, 2, keepdim=True)[0]
        raw_logits = input_logits - b
        exp_pred = torch.exp(raw_logits) * rule_masks

        norm = torch.sum(exp_pred, 2, keepdim=True)
        prob = torch.div(exp_pred, norm)
                
        loss = F.binary_cross_entropy(prob, true_binary)
        
        return loss
コード例 #24
0
    def forward(self, text, video, ind, conf=True):

        text_embd = {}

        for i, l in enumerate(self.video_GU):
            video[self.m[i]] = l(video[self.m[i]])

        for i, l in enumerate(self.text_GU):
            text_embd[self.m[i]] = l(text)


        #MOE weights computation + normalization ------------
        moe_weights = self.moe_fc(text)
        moe_weights = F.softmax(moe_weights, dim=1)

        available_m = np.zeros(moe_weights.size())

        i = 0
        for m in video:
            available_m[:,i] = ind[m]
            i += 1

        available_m = th.from_numpy(available_m).float()
        available_m = Variable(available_m.cuda())

        moe_weights = available_m*moe_weights

        norm_weights = th.sum(moe_weights, dim=1)
        norm_weights = norm_weights.unsqueeze(1)
        moe_weights = th.div(moe_weights, norm_weights)

        #MOE weights computation + normalization ------ DONE

        if conf:
            conf_matrix = Variable(th.zeros(len(text),len(text)).cuda())
            i = 0
            for m in video:
                video[m] = video[m].transpose(0,1)
                conf_matrix += moe_weights[:,i:i+1]*th.matmul(text_embd[m], video[m])
                i += 1

            return conf_matrix
        else:
            i = 0
            scores = Variable(th.zeros(len(text)).cuda())
            for m in video:
                text_embd[m] = moe_weights[:,i:i+1]*text_embd[m]*video[m]
                scores += th.sum(text_embd[m], dim=-1)
                i += 1
             
            return scores
コード例 #25
0
ファイル: random_sampling.py プロジェクト: Unbabel/OpenNMT-py
def sample_with_temperature(logits, sampling_temp, keep_topk):
    """Select next tokens randomly from the top k possible next tokens.

    Samples from a categorical distribution over the ``keep_topk`` words using
    the category probabilities ``logits / sampling_temp``.

    Args:
        logits (FloatTensor): Shaped ``(batch_size, vocab_size)``.
            These can be logits (``(-inf, inf)``) or log-probs (``(-inf, 0]``).
            (The distribution actually uses the log-probabilities
            ``logits - logits.logsumexp(-1)``, which equals the logits if
            they are log-probabilities summing to 1.)
        sampling_temp (float): Used to scale down logits. The higher the
            value, the more likely it is that a non-max word will be
            sampled.
        keep_topk (int): This many words could potentially be chosen. The
            other logits are set to have probability 0.

    Returns:
        (LongTensor, FloatTensor):

        * topk_ids: Shaped ``(batch_size, 1)``. These are
          the sampled word indices in the output vocab.
        * topk_scores: Shaped ``(batch_size, 1)``. These
          are essentially ``(logits / sampling_temp)[topk_ids]``.
    """

    if sampling_temp == 0.0 or keep_topk == 1:
        # For temp=0.0, take the argmax to avoid divide-by-zero errors.
        # keep_topk=1 is also equivalent to argmax.
        topk_scores, topk_ids = logits.topk(1, dim=-1)
        if sampling_temp > 0:
            topk_scores /= sampling_temp
    else:
        logits = torch.div(logits, sampling_temp)

        if keep_topk > 0:
            top_values, top_indices = torch.topk(logits, keep_topk, dim=1)
            kth_best = top_values[:, -1].view([-1, 1])
            kth_best = kth_best.repeat([1, logits.shape[1]]).float()

            # Set all logits that are not in the top-k to -10000.
            # This puts the probabilities close to 0.
            ignore = torch.lt(logits, kth_best)
            logits = logits.masked_fill(ignore, -10000)

        dist = torch.distributions.Multinomial(
            logits=logits, total_count=1)
        topk_ids = torch.argmax(dist.sample(), dim=1, keepdim=True)
        topk_scores = logits.gather(dim=1, index=topk_ids)
    return topk_ids, topk_scores
コード例 #26
0
ファイル: utils.py プロジェクト: AashishV/visDial.pytorch
def l2_norm(input):
    """
    input: feature that need to normalize.
    output: normalziaed feature.
    """
    input_size = input.size()
    buffer = torch.pow(input, 2)

    normp = torch.sum(buffer, 1).add_(1e-10)
    norm = torch.sqrt(normp)
    _output = torch.div(input, norm.view(-1, 1).expand_as(input))
    output = _output.view(input_size)

    return output
コード例 #27
0
ファイル: process.py プロジェクト: gaobb/SparseConvNet
def rescaleCharacter(c):
    cc = torch.cat(c, 0)
    m = cc.min(0)[0]
    s = (cc.max(0)[0] - m).float()
    for i in range(len(c)):
        c[i] = (
            torch.div(
                (c[i] -
                 m.expand_as(
                    c[i])).float(),
                s.expand_as(
                    c[i])) *
            255.99).byte()
    return c
コード例 #28
0
ファイル: Normalize.py プロジェクト: Jsmilemsj/pytorch
    def updateOutput(self, input):
        assert input.dim() == 2
        input_size = input.size()

        if self._output is None:
            self._output = input.new()
        if self.norm is None:
            self.norm = input.new()
        if self.buffer is None:
            self.buffer = input.new()

        self._output.resize_as_(input)

        # specialization for the infinity norm
        if self.p == float('inf'):
            if not self._indices:
                self._indices = torch.cuda.FloatTensor() if torch.typename(self.output) == 'torch.cuda.FloatTensor' \
                    else torch.LongTensor()

            torch.abs(input, out=self.buffer)
            torch.max(self._indices, self.buffer, 1, out=self.norm, keepdim=True)
            self.norm.add_(self.eps)
        else:
            if self.normp is None:
                self.normp = input.new()
            if self.p % 2 != 0:
                torch.abs(input, out=self.buffer).pow_(self.p)
            else:
                torch.pow(input, self.p, out=self.buffer)

            torch.sum(self.buffer, 1, out=self.normp, keepdim=True).add_(self.eps)
            torch.pow(self.normp, 1. / self.p, out=self.norm)

        torch.div(input, self.norm.view(-1, 1).expand_as(input), out=self._output)

        self.output = self._output.view(input_size)
        return self.output
コード例 #29
0
ファイル: Split.py プロジェクト: ParsonsZeng/DiCoNet
 def forward(self, input_n, hidden, phi, nh):
     self.batch_size = input_n.size()[0]
     hidden = torch.cat((hidden, input_n), 2)
     # Aggregate reresentations
     h_conv = torch.div(torch.bmm(phi, hidden), nh)
     hidden = hidden.view(-1, self.hidden_size + self.input_size)
     h_conv = h_conv.view(-1, self.hidden_size + self.input_size)
     # h_conv has shape (batch_size, n, hidden_size + input_size)
     m1 = (torch.mm(hidden, self.W1)
           .view(self.batch_size, -1, self.hidden_size))
     m2 = (torch.mm(h_conv, self.W2)
           .view(self.batch_size, -1, self.hidden_size))
     m3 = self.b.unsqueeze(0).unsqueeze(1).expand_as(m2)
     hidden = torch.sigmoid(m1 + m2 + m3)
     return hidden
コード例 #30
0
ファイル: ian.py プロジェクト: coder352/shellscript
    def forward(self, inputs):
        text_raw_indices, aspect_indices = inputs[0], inputs[1]
        text_raw_len = torch.sum(text_raw_indices != 0, dim=-1)
        aspect_len = torch.sum(aspect_indices != 0, dim=-1)

        context = self.embed(text_raw_indices)
        aspect = self.embed(aspect_indices)
        context, (_, _) = self.lstm_context(context, text_raw_len)
        aspect, (_, _) = self.lstm_aspect(aspect, aspect_len)

        aspect_len = torch.tensor(aspect_len, dtype=torch.float).to(self.opt.device)
        aspect = torch.sum(aspect, dim=1)
        aspect = torch.div(aspect, aspect_len.view(aspect_len.size(0), 1))

        text_raw_len = torch.tensor(text_raw_len, dtype=torch.float).to(self.opt.device)
        context = torch.sum(context, dim=1)
        context = torch.div(context, text_raw_len.view(text_raw_len.size(0), 1))

        aspect_final = self.attention_aspect(aspect, context).squeeze(dim=1)
        context_final = self.attention_context(context, aspect).squeeze(dim=1)

        x = torch.cat((aspect_final, context_final), dim=-1)
        out = self.dense(x)
        return out
コード例 #31
0
 def sum_normalize(cs, axis=TensorAxis.C):
     reduce_sum = torch.sum(cs, dim=axis, keepdim=True)
     cs_normalize = torch.div(cs, reduce_sum)
     return cs_normalize
コード例 #32
0
def l2norm(X, dim, eps=1e-8):
    """L2-normalize columns of X
    """
    norm = torch.pow(X, 2).sum(dim=dim, keepdim=True).sqrt() + eps
    X = torch.div(X, norm)
    return X
コード例 #33
0
ファイル: bregman_pytorch.py プロジェクト: syelman/DM-Count
def sinkhorn_stabilized(a,
                        b,
                        C,
                        reg=1e-1,
                        maxIter=1000,
                        tau=1e3,
                        stopThr=1e-9,
                        verbose=False,
                        log=False,
                        warm_start=None,
                        eval_freq=10,
                        print_freq=200,
                        **kwargs):
    """
    Solve the entropic regularization OT problem with log stabilization
    The function solves the following optimization problem:

    .. math::
        \gamma = arg\min_\gamma <\gamma,C>_F + reg\cdot\Omega(\gamma)
        s.t. \gamma 1 = a
             \gamma^T 1= b
             \gamma\geq 0
    where :
    - C is the (ns,nt) metric cost matrix
    - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
    - a and b are target and source measures (sum to 1)

    The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm as proposed in [1]
    but with the log stabilization proposed in [3] an defined in [2] (Algo 3.1)

    Parameters
    ----------
    a : torch.tensor (na,)
        samples measure in the target domain
    b : torch.tensor (nb,)
        samples in the source domain
    C : torch.tensor (na,nb)
        loss matrix
    reg : float
        Regularization term > 0
    tau : float
        thershold for max value in u or v for log scaling
    maxIter : int, optional
        Max number of iterations
    stopThr : float, optional
        Stop threshol on error ( > 0 )
    verbose : bool, optional
        Print information along iterations
    log : bool, optional
        record log if True

    Returns
    -------
    gamma : (na x nb) torch.tensor
        Optimal transportation matrix for the given parameters
    log : dict
        log dictionary return only if log==True in parameters

    References
    ----------
    [1] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013
    [2] Bernhard Schmitzer. Stabilized Sparse Scaling Algorithms for Entropy Regularized Transport Problems. SIAM Journal on Scientific Computing, 2019
    [3] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016). Scaling algorithms for unbalanced transport problems. arXiv preprint arXiv:1607.05816.

    See Also
    --------

    """

    device = a.device
    na, nb = C.shape

    assert na >= 1 and nb >= 1, 'C needs to be 2d'
    assert na == a.shape[0] and nb == b.shape[
        0], "Shape of a or b does't match that of C"
    assert reg > 0, 'reg should be greater than 0'
    assert a.min() >= 0. and b.min() >= 0., 'Elements in a or b less than 0'

    if log:
        log = {'err': []}

    if warm_start is not None:
        alpha = warm_start['alpha']
        beta = warm_start['beta']
    else:
        alpha = torch.zeros(na, dtype=a.dtype).to(device)
        beta = torch.zeros(nb, dtype=b.dtype).to(device)

    u = torch.ones(na, dtype=a.dtype).to(device) / na
    v = torch.ones(nb, dtype=b.dtype).to(device) / nb

    def update_K(alpha, beta):
        """log space computation"""
        """memory efficient"""
        torch.add(alpha.reshape(-1, 1), beta.reshape(1, -1), out=K)
        torch.add(K, -C, out=K)
        torch.div(K, reg, out=K)
        torch.exp(K, out=K)

    def update_P(alpha, beta, u, v, ab_updated=False):
        """log space P (gamma) computation"""
        torch.add(alpha.reshape(-1, 1), beta.reshape(1, -1), out=P)
        torch.add(P, -C, out=P)
        torch.div(P, reg, out=P)
        if not ab_updated:
            torch.add(P, torch.log(u + M_EPS).reshape(-1, 1), out=P)
            torch.add(P, torch.log(v + M_EPS).reshape(1, -1), out=P)
        torch.exp(P, out=P)

    K = torch.empty(C.shape, dtype=C.dtype).to(device)
    update_K(alpha, beta)

    b_hat = torch.empty(b.shape, dtype=C.dtype).to(device)

    it = 1
    err = 1
    ab_updated = False

    # allocate memory beforehand
    KTu = torch.empty(v.shape, dtype=v.dtype).to(device)
    Kv = torch.empty(u.shape, dtype=u.dtype).to(device)
    P = torch.empty(C.shape, dtype=C.dtype).to(device)

    while (err > stopThr and it <= maxIter):
        upre, vpre = u, v
        torch.matmul(u, K, out=KTu)
        v = torch.div(b, KTu + M_EPS)
        torch.matmul(K, v, out=Kv)
        u = torch.div(a, Kv + M_EPS)

        ab_updated = False
        # remove numerical problems and store them in K
        if u.abs().sum() > tau or v.abs().sum() > tau:
            alpha += reg * torch.log(u + M_EPS)
            beta += reg * torch.log(v + M_EPS)
            u.fill_(1. / na)
            v.fill_(1. / nb)
            update_K(alpha, beta)
            ab_updated = True

        if log and it % eval_freq == 0:
            # we can speed up the process by checking for the error only all
            # the eval_freq iterations
            update_P(alpha, beta, u, v, ab_updated)
            b_hat = torch.sum(P, 0)
            err = (b - b_hat).pow(2).sum().item()
            log['err'].append(err)

        if verbose and it % print_freq == 0:
            print('iteration {:5d}, constraint error {:5e}'.format(it, err))

        it += 1

    if log:
        log['u'] = u
        log['v'] = v
        log['alpha'] = alpha + reg * torch.log(u + M_EPS)
        log['beta'] = beta + reg * torch.log(v + M_EPS)

    # transport plan
    update_P(alpha, beta, u, v, False)

    if log:
        return P, log
    else:
        return P
コード例 #34
0
ファイル: util.py プロジェクト: jacobver/mem_seq2seq
def similarity(vec, mat, eps=1e-6):
    vec_norm = torch.norm(vec, 2, 1)
    mat_norm = torch.norm(mat, 2, 2)
    normalized_vec = torch.div(vec, vec_norm.expand_as(vec).clamp(min=eps))
    normalized_mat = torch.div(mat, mat_norm.expand_as(mat).clamp(min=eps))
    return torch.bmm(normalized_mat, normalized_vec.unsqueeze(2)).squeeze(2)
コード例 #35
0
def l2norm(x, dim=-1):
    norm = torch.pow(x, 2).sum(dim=dim, keepdim=True).sqrt()
    x_norm = torch.div(x, norm)
    return x_norm, norm
コード例 #36
0
    def test_precedence_semantics(self):
        """Test semantics for __torch_function__ for functions that take
        multiple arguments

        For functions that take multiple arguments, the appropriate
        __torch_function__ implementation to call is determined by
        examining the types of the arguments. The precedence order is
        left-to-right in the argument list, except subclasses are always
        checked before superclasses. The first result of calling the
        implementations in precedence order that is not NotImplemented
        is returned to the user. If all implementations return
        NotImplemented, a TypeError is raised.

        All cases are tested with functions implemented in C++ and
        either foo or baz, which are python functions defined above that
        are instrumented to obey the same dispatch rules as the
        functions in torch.functional.
        """
        # DiagonalTensor has a valid override and SubDiagonal has an
        # override that returns NotImplemented so we should call the
        # DiagonalTensor implementation, returning -1
        t1 = DiagonalTensor(5, 2)
        t2 = SubDiagonalTensor(5, 2)
        self.assertEqual(torch.div(t1, t2), -1)
        self.assertEqual(torch.div(t2, t1), -1)
        self.assertEqual(foo(t1, t2), -1)
        self.assertEqual(foo(t2, t1), -1)

        # SubTensor has an implementation that returns NotImplemented as
        # well so it should behave exactly like SubDiagonalTensor in the
        # test above
        t3 = SubTensor([[1, 2], [1, 2]])
        self.assertEqual(torch.div(t1, t3), -1)
        self.assertEqual(torch.div(t3, t1), -1)
        self.assertEqual(foo(t1, t3), -1)
        self.assertEqual(foo(t3, t1), -1)

        # div between SubTensor and SubDiagonalTensor should raise
        # TypeError since both have an implementation that
        # explicitly returns NotImplemented
        with self.assertRaises(TypeError):
            torch.div(t2, t3)
        with self.assertRaises(TypeError):
            torch.div(t3, t2)
        with self.assertRaises(TypeError):
            foo(t2, t3)
        with self.assertRaises(TypeError):
            foo(t3, t2)

        # none of DiagonalTensor, SubdiagonalTensor, or SubTensor have a
        # mul or a baz implementation so all ops should raise TypeError
        with self.assertRaises(TypeError):
            torch.mul(t1, t1)
        with self.assertRaises(TypeError):
            torch.mul(t1, t2)
        with self.assertRaises(TypeError):
            torch.mul(t1, t3)
        with self.assertRaises(TypeError):
            torch.mul(t2, t1)
        with self.assertRaises(TypeError):
            torch.mul(t2, t2)
        with self.assertRaises(TypeError):
            torch.mul(t2, t3)
        with self.assertRaises(TypeError):
            torch.mul(t3, t1)
        with self.assertRaises(TypeError):
            torch.mul(t3, t2)
        with self.assertRaises(TypeError):
            torch.mul(t3, t3)
        with self.assertRaises(TypeError):
            baz(t1, t1)
        with self.assertRaises(TypeError):
            baz(t1, t2)
        with self.assertRaises(TypeError):
            baz(t1, t3)
        with self.assertRaises(TypeError):
            baz(t2, t1)
        with self.assertRaises(TypeError):
            baz(t2, t2)
        with self.assertRaises(TypeError):
            baz(t2, t3)
        with self.assertRaises(TypeError):
            baz(t3, t1)
        with self.assertRaises(TypeError):
            baz(t3, t2)
        with self.assertRaises(TypeError):
            baz(t3, t3)
コード例 #37
0
def GAN_pretrain(model, GAN_model, criterion, optimizer, pos_feats, maxiter):
    model.eval()
    GAN_model.train()
    GAN_mask_batch_size = opts['GAN_mask_batch_size']

    # -------------Evaluate mask-------------
    # print('Evaluating Mask')
    n = pos_feats.size(0)
    nBatches = int(round(float(n)/GAN_mask_batch_size))
    prob_k = torch.zeros(9, 1)
    for k in range(0, 9):
        row = int(math.floor(k/3))
        col = k % 3
        for i in range(1, nBatches+1):
            # prepare batch
            batch = pos_feats[GAN_mask_batch_size*(i-1):min(pos_feats.size(0), GAN_mask_batch_size*i), :].data.clone()
            batch = batch.view(-1, 512, 3, 3)
            batch[:, :, col, row] = 0
            batch = batch.view(batch.size(0), -1)

            # prepare label
            feat = model(batch, in_layer='fc4')
            if i == 1:
                feats = feat.data.clone()
            else:
                feats = torch.cat((feats, feat.data.clone()), 0)
        X = feats
        X_max = torch.max(feats, dim=1)[0]
        X_max = X_max.repeat(2, 1).permute(1, 0)
        E = torch.exp(feats-X_max)
        L = torch.sum(E, 1)
        Y = torch.div(E, L.repeat(2, 1).permute(1, 0))
        prob_k[k] = torch.sum(Y, dim=0)[0]
        # print('mask {}, value: {:.3f}'.format(k, prob_k[k][0]))
    _, idx = torch.min(prob_k, 0)
    row = int(math.floor(idx/3))
    col = idx % 3

    # -------------GAN------------------
    GAN_model.train()
    GAN_batch_size = opts['GAN_batch_size']
    nBatches = int(round(float(n)/GAN_batch_size))
    objective = torch.zeros(1, maxiter)

    # prepare batch data
    pos_idx = np.random.permutation(pos_feats.size(0))
    while(len(pos_idx) < GAN_batch_size*maxiter):
        pos_idx = np.concatenate([pos_idx, np.random.permutation(pos_feats.size(0))])
    pos_pointer = 0
    # iter
    for iter in range(maxiter):
        tic = time.time()
        # select pos idx
        pos_next = pos_pointer + GAN_batch_size
        pos_cur_idx = pos_idx[pos_pointer:pos_next]
        pos_cur_idx = pos_feats.new(pos_cur_idx).long()
        pos_pointer = pos_next
        # create batch
        batch_pos_feats = Variable(pos_feats.index_select(0, pos_cur_idx))
        labels = torch.ones(3, 3, 1, GAN_batch_size)
        labels[col, row, :] = 0
        if opts['use_gpu']:
            batch_pos_feats = batch_pos_feats.cuda()
            labels = labels.cuda()
        GAN_score = GAN_model(batch_pos_feats).view(3, 3, 1, GAN_batch_size)

        # optimize
        loss = criterion(GAN_score, labels)
        GAN_model.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(GAN_model.parameters(), opts['grad_clip'])
        optimizer.step()

        # result
        objective[:, iter] = loss.item() / GAN_batch_size

        # objective[iter] =
        print "Pretrain GAN: Iter %d, Loss %.4f, Time %.3f" % (iter+1, torch.mean(objective[:,0:iter+1], dim=1).data, time.time()-tic)
コード例 #38
0
ファイル: losses.py プロジェクト: sowmen/imanip_main
    def forward(self, features, labels=None, mask=None):
        """Compute loss for model. If both `labels` and `mask` are None,
        it degenerates to SimCLR unsupervised loss:
        https://arxiv.org/pdf/2002.05709.pdf
        Args:
            features: hidden vector of shape [bsz, n_views, ...].
            labels: ground truth of shape [bsz].
            mask: contrastive mask of shape [bsz, bsz], mask_{i,j}=1 if sample j
                has the same class as sample i. Can be asymmetric.
        Returns:
            A loss scalar.
        """
        device = (torch.device('cuda')
                  if features.is_cuda else torch.device('cpu'))

        if len(features.shape) < 3:
            raise ValueError('`features` needs to be [bsz, n_views, ...],'
                             'at least 3 dimensions are required')
        if len(features.shape) > 3:
            features = features.view(features.shape[0], features.shape[1], -1)

        batch_size = features.shape[0]
        if labels is not None and mask is not None:
            raise ValueError('Cannot define both `labels` and `mask`')
        elif labels is None and mask is None:
            mask = torch.eye(batch_size, dtype=torch.float32).to(device)
        elif labels is not None:
            labels = labels.contiguous().view(-1, 1)
            if labels.shape[0] != batch_size:
                raise ValueError(
                    'Num of labels does not match num of features')
            mask = torch.eq(labels, labels.T).float().to(device)
        else:
            mask = mask.float().to(device)

        contrast_count = features.shape[1]
        contrast_feature = torch.cat(torch.unbind(features, dim=1), dim=0)
        if self.contrast_mode == 'one':
            anchor_feature = features[:, 0]
            anchor_count = 1
        elif self.contrast_mode == 'all':
            anchor_feature = contrast_feature
            anchor_count = contrast_count
        else:
            raise ValueError('Unknown mode: {}'.format(self.contrast_mode))

        # compute logits
        anchor_dot_contrast = torch.div(
            torch.matmul(anchor_feature, contrast_feature.T), self.temperature)
        # for numerical stability
        logits_max, _ = torch.max(anchor_dot_contrast, dim=1, keepdim=True)
        logits = anchor_dot_contrast - logits_max.detach()

        # tile mask
        mask = mask.repeat(anchor_count, contrast_count)
        # mask-out self-contrast cases
        logits_mask = torch.scatter(
            torch.ones_like(mask), 1,
            torch.arange(batch_size * anchor_count).view(-1, 1).to(device), 0)
        mask = mask * logits_mask

        # compute log_prob
        exp_logits = torch.exp(logits) * logits_mask
        log_prob = logits - torch.log(exp_logits.sum(1, keepdim=True))

        # compute mean of log-likelihood over positive
        mean_log_prob_pos = (mask * log_prob).sum(1) / mask.sum(1)

        # loss
        loss = -(self.temperature / self.base_temperature) * mean_log_prob_pos
        loss = loss.view(anchor_count, batch_size).mean()

        return loss
コード例 #39
0
def linear_cg(
    matmul_closure,
    rhs,
    n_tridiag=0,
    tolerance=1e-6,
    eps=1e-10,
    max_iter=None,
    max_tridiag_iter=None,
    initial_guess=None,
    preconditioner=None,
):
    """
    Implements the linear conjugate gradients method for (approximately) solving systems of the form

        lhs result = rhs

    for positive definite and symmetric matrices.

    Args:
      - matmul_closure - a function which performs a left matrix multiplication with lhs_mat
      - rhs - the right-hand side of the equation
      - n_tridiag - returns a tridiagonalization of the first n_tridiag columns of rhs
      - tolerance - stop the solve when the max residual is less than this
      - eps - noise to add to prevent division by zero
      - max_iter - the maximum number of CG iterations
      - max_tridiag_iter - the maximum size of the tridiagonalization matrix
      - initial_guess - an initial guess at the solution `result`
      - precondition_closure - a functions which left-preconditions a supplied vector

    Returns:
      result - a solution to the system (if n_tridiag is 0)
      result, tridiags - a solution to the system, and corresponding tridiagonal matrices (if n_tridiag > 0)
    """
    # Unsqueeze, if necesasry
    is_vector = rhs.ndimension() == 1
    if is_vector:
        rhs = rhs.unsqueeze(-1)

    # Some default arguments
    if max_iter is None:
        max_iter = settings.max_cg_iterations.value()
    if max_tridiag_iter is None:
        max_tridiag_iter = settings.max_lanczos_quadrature_iterations.value()
    if initial_guess is None:
        initial_guess = torch.zeros_like(rhs)
    if preconditioner is None:
        preconditioner = _default_preconditioner
        precond = False
    else:
        precond = True

    # If we are running m CG iterations, we obviously can't get more than m Lanczos coefficients
    if max_tridiag_iter > max_iter:
        raise RuntimeError(
            "Getting a tridiagonalization larger than the number of CG iterations run is not possible!"
        )

    # Check matmul_closure object
    if torch.is_tensor(matmul_closure):
        matmul_closure = matmul_closure.matmul
    elif not callable(matmul_closure):
        raise RuntimeError(
            "matmul_closure must be a tensor, or a callable object!")

    # Get some constants
    batch_shape = rhs.shape[:-2]
    num_rows = rhs.size(-2)
    n_iter = min(max_iter,
                 num_rows) if settings.terminate_cg_by_size.on() else max_iter
    n_tridiag_iter = min(max_tridiag_iter, num_rows)

    # result <- x_{0}
    result = initial_guess

    # residual: residual_{0} = b_vec - lhs x_{0}
    residual = rhs - matmul_closure(result)

    # Check for NaNs
    if not torch.equal(residual, residual):
        raise RuntimeError(
            "NaNs encounterd when trying to perform matrix-vector multiplication"
        )

    # Sometime we're lucky and the preconditioner solves the system right away
    residual_norm = residual.norm(2, dim=-2)
    if (residual_norm < tolerance).all() and not n_tridiag:
        n_iter = 0  # Skip the iteration!

    # Otherwise, let's define precond_residual and curr_conjugate_vec
    else:
        # precon_residual{0} = M^-1 residual_{0}
        precond_residual = preconditioner(residual)
        curr_conjugate_vec = precond_residual
        residual_inner_prod = precond_residual.mul(residual).sum(-2,
                                                                 keepdim=True)

        # Define storage matrices
        mul_storage = torch.empty_like(residual)
        alpha = torch.empty(*batch_shape,
                            rhs.size(-1),
                            dtype=residual.dtype,
                            device=residual.device)
        beta = torch.empty_like(alpha)

    # Define tridiagonal matrices, if applicable
    if n_tridiag:
        t_mat = torch.zeros(n_tridiag_iter,
                            n_tridiag_iter,
                            *batch_shape,
                            n_tridiag,
                            dtype=alpha.dtype,
                            device=alpha.device)
        alpha_reciprocal = torch.empty(*batch_shape,
                                       n_tridiag,
                                       dtype=t_mat.dtype,
                                       device=t_mat.device)
        prev_alpha_reciprocal = torch.empty_like(alpha_reciprocal)
        prev_beta = torch.empty_like(alpha_reciprocal)

    update_tridiag = True
    last_tridiag_iter = 0
    # Start the iteration
    for k in range(n_iter):
        # Get next alpha
        # alpha_{k} = (residual_{k-1}^T precon_residual{k-1}) / (p_vec_{k-1}^T mat p_vec_{k-1})
        mvms = matmul_closure(curr_conjugate_vec)
        if precond:
            torch.mul(curr_conjugate_vec, mvms, out=mul_storage)
            torch.sum(mul_storage, -2, keepdim=True, out=alpha)
            alpha.add_(eps)
            torch.div(residual_inner_prod, alpha, out=alpha)

            # Update residual
            # residual_{k} = residual_{k-1} - alpha_{k} mat p_vec_{k-1}
            torch.addcmul(residual, -1, alpha, mvms, out=residual)

            # Update precond_residual
            # precon_residual{k} = M^-1 residual_{k}
            precond_residual = preconditioner(residual)

            _jit_linear_cg_updates(
                result,
                alpha,
                residual_inner_prod,
                torch.tensor(eps),
                beta,
                residual,
                precond_residual,
                mul_storage,
                curr_conjugate_vec,
            )
        else:
            _jit_linear_cg_updates_no_precond(
                mvms,
                result,
                alpha,
                residual_inner_prod,
                torch.tensor(eps),
                beta,
                residual,
                precond_residual,
                mul_storage,
                curr_conjugate_vec,
            )

        # Update tridiagonal matrices, if applicable
        if n_tridiag and k < n_tridiag_iter and update_tridiag:
            alpha_tridiag = alpha.squeeze_(-2).narrow(-1, 0, n_tridiag)
            beta_tridiag = beta.squeeze_(-2).narrow(-1, 0, n_tridiag)
            torch.reciprocal(alpha_tridiag, out=alpha_reciprocal)

            if k == 0:
                t_mat[k, k].copy_(alpha_reciprocal)
            else:
                torch.addcmul(alpha_reciprocal,
                              prev_beta,
                              prev_alpha_reciprocal,
                              out=t_mat[k, k])
                torch.mul(prev_beta.sqrt_(),
                          prev_alpha_reciprocal,
                          out=t_mat[k, k - 1])
                t_mat[k - 1, k].copy_(t_mat[k, k - 1])

                if t_mat[k - 1, k].max() < 1e-6:
                    update_tridiag = False

            last_tridiag_iter = k

            prev_alpha_reciprocal.copy_(alpha_reciprocal)
            prev_beta.copy_(beta_tridiag)

    if is_vector:
        result = result.squeeze(-1)

    if n_tridiag:
        t_mat = t_mat[:last_tridiag_iter + 1, :last_tridiag_iter + 1]
        return result, t_mat.permute(-1, *range(2, 2 + len(batch_shape)), 0,
                                     1).contiguous()
    else:
        return result
コード例 #40
0
def _div_aten(a, b):
    if isinstance(a, (bool, int)):
        return torch.div(a, b, rounding_mode="trunc")
    return torch.true_divide(a, b)
コード例 #41
0
    def _generate_single_step(self,
                              src_tokens,
                              src_lengths,
                              beam_size=None,
                              maxlen=None,
                              prefix_tokens=None):
        bsz, srclen = src_tokens.size()
        maxlen = min(maxlen,
                     self.maxlen) if maxlen is not None else self.maxlen

        # the max beam size is the dictionary size - 1, since we never select pad
        beam_size = beam_size if beam_size is not None else self.beam_size
        beam_size = min(beam_size, self.vocab_size - 1)

        encoder_outs = []
        incremental_states = {}
        for model in self.models:
            if not self.retain_dropout:
                model.eval()
            if isinstance(model.decoder, FairseqIncrementalDecoder):
                incremental_states[model] = {}
            else:
                incremental_states[model] = None

            # compute the encoder output for each beam
            encoder_out = model.encoder(
                src_tokens.repeat(1, beam_size).view(-1, srclen),
                src_lengths.expand(
                    beam_size, src_lengths.numel()).t().contiguous().view(-1),
            )
            encoder_outs.append(encoder_out)

        # initialize buffers
        scores = src_tokens.data.new(bsz * beam_size,
                                     maxlen + 1).float().fill_(0)
        scores_buf = scores.clone()
        tokens = src_tokens.data.new(bsz * beam_size,
                                     maxlen + 2).fill_(self.pad)
        tokens_buf = tokens.clone()
        tokens[:, 0] = self.eos
        attn = scores.new(bsz * beam_size, src_tokens.size(1), maxlen + 2)
        attn_buf = attn.clone()

        # list of completed sentences
        finalized = [[] for i in range(bsz)]
        finished = [False for i in range(bsz)]
        worst_finalized = [{
            'idx': None,
            'score': -math.inf
        } for i in range(bsz)]
        num_remaining_sent = bsz

        # number of candidate hypos per step
        cand_size = 2 * beam_size  # 2 x beam size in case half are EOS

        # offset arrays for converting between different indexing schemes
        bbsz_offsets = (torch.arange(0, bsz) *
                        beam_size).unsqueeze(1).type_as(tokens)
        cand_offsets = torch.arange(0, cand_size).type_as(tokens)

        # helper function for allocating buffers on the fly
        buffers = {}

        def buffer(name, type_of=tokens):  # noqa
            if name not in buffers:
                buffers[name] = type_of.new()
            return buffers[name]

        def is_finished(sent, step, unfinalized_scores=None):
            """
            Check whether we've finished generation for a given sentence, by
            comparing the worst score among finalized hypotheses to the best
            possible score among unfinalized hypotheses.
            """
            assert len(finalized[sent]) <= beam_size
            if len(finalized[sent]) == beam_size:
                if self.stop_early or step == maxlen or unfinalized_scores is None:
                    return True
                # stop if the best unfinalized score is worse than the worst
                # finalized one
                best_unfinalized_score = unfinalized_scores[sent].max()
                if self.normalize_scores:
                    best_unfinalized_score /= maxlen**self.len_penalty
                if worst_finalized[sent]['score'] >= best_unfinalized_score:
                    return True
            return False

        def finalize_hypos(step,
                           bbsz_idx,
                           eos_scores,
                           unfinalized_scores=None):
            """
            Finalize the given hypotheses at this step, while keeping the total
            number of finalized hypotheses per sentence <= beam_size.
            Note: the input must be in the desired finalization order, so that
            hypotheses that appear earlier in the input are preferred to those
            that appear later.
            Args:
                step: current time step
                bbsz_idx: A vector of indices in the range [0, bsz*beam_size),
                    indicating which hypotheses to finalize
                eos_scores: A vector of the same size as bbsz_idx containing
                    scores for each hypothesis
                unfinalized_scores: A vector containing scores for all
                    unfinalized hypotheses
            """
            assert bbsz_idx.numel() == eos_scores.numel()

            # clone relevant token and attention tensors
            tokens_clone = tokens.index_select(0, bbsz_idx)
            tokens_clone = tokens_clone[:, 1:step +
                                        2]  # skip the first index, which is EOS
            tokens_clone[:, step] = self.eos
            attn_clone = attn.index_select(0, bbsz_idx)[:, :, 1:step + 2]

            # compute scores per token position
            pos_scores = scores.index_select(0, bbsz_idx)[:, :step + 1]
            pos_scores[:, step] = eos_scores
            # convert from cumulative to per-position scores
            pos_scores[:, 1:] = pos_scores[:, 1:] - pos_scores[:, :-1]

            # normalize sentence-level scores
            if self.normalize_scores:
                eos_scores /= (step + 1)**self.len_penalty

            cum_unfin = []
            prev = 0
            for f in finished:
                if f:
                    prev += 1
                else:
                    cum_unfin.append(prev)

            sents_seen = set()
            for i, (idx, score) in enumerate(
                    zip(bbsz_idx.tolist(), eos_scores.tolist())):
                unfin_idx = idx // beam_size
                sent = unfin_idx + cum_unfin[unfin_idx]

                sents_seen.add((sent, unfin_idx))

                def get_hypo():

                    # remove padding tokens from attn scores
                    nonpad_idxs = src_tokens[sent].ne(self.pad)
                    hypo_attn = attn_clone[i][nonpad_idxs]
                    _, alignment = hypo_attn.max(dim=0)

                    return {
                        'tokens': tokens_clone[i],
                        'score': score,
                        'attention': hypo_attn,  # src_len x tgt_len
                        'alignment': alignment,
                        'positional_scores': pos_scores[i],
                    }

                if len(finalized[sent]) < beam_size:
                    finalized[sent].append(get_hypo())
                elif not self.stop_early and score > worst_finalized[sent][
                        'score']:
                    # replace worst hypo for this sentence with new/better one
                    worst_idx = worst_finalized[sent]['idx']
                    if worst_idx is not None:
                        finalized[sent][worst_idx] = get_hypo()

                    # find new worst finalized hypo for this sentence
                    idx, s = min(enumerate(finalized[sent]),
                                 key=lambda r: r[1]['score'])
                    worst_finalized[sent] = {
                        'score': s['score'],
                        'idx': idx,
                    }

            newly_finished = []
            for sent, unfin_idx in sents_seen:
                # check termination conditions for this sentence
                if not finished[sent] and is_finished(sent, step,
                                                      unfinalized_scores):
                    finished[sent] = True
                    newly_finished.append(unfin_idx)
            return newly_finished

        reorder_state = None
        batch_idxs = None
        # print("SHAPE", prefix_tokens.size()[1]+1)
        if prefix_tokens is not None:
            num_of_steps = prefix_tokens.size()[1] + 1
            print("PREFIX TOKENS NOT NONE")
        else:
            print("PREFIX TOKENS NONE")
            num_of_steps = 1
        print("NUM OF STEPS", num_of_steps)
        for step in range(num_of_steps):  # one extra step for EOS marker
            # reorder decoder internal states based on the prev choice of beams
            if reorder_state is not None:
                if batch_idxs is not None:
                    # update beam indices to take into account removed sentences
                    corr = batch_idxs - torch.arange(
                        batch_idxs.numel()).type_as(batch_idxs)
                    reorder_state.view(-1, beam_size).add_(
                        corr.unsqueeze(-1) * beam_size)
                for i, model in enumerate(self.models):
                    if isinstance(model.decoder, FairseqIncrementalDecoder):
                        model.decoder.reorder_incremental_state(
                            incremental_states[model], reorder_state)
                    encoder_outs[i] = model.encoder.reorder_encoder_out(
                        encoder_outs[i], reorder_state)

            probs, avg_attn_scores = self._decode(tokens[:, :step + 1],
                                                  encoder_outs,
                                                  incremental_states)

            # print(probs,probs.size(),"PROBS, SequenceGenerator")
            # print(avg_attn_scores, avg_attn_scores.size(), "avg_attn_scores SequenceGenerator")

            # print(probs.numpy())
            c = np.exp(probs.numpy()[:10])
            a = (np.argsort(-probs.numpy()[0]))
            b = [self.tgt_dict.symbols[x] for x in a[:10]]
            # d = (np.argmax(-probs.numpy()[0]))

            # print(b,step)
            if step == num_of_steps - 1:
                return probs.numpy()[0]
            # print(d)
            # raise Exception

            if step == 0:
                # at the first step all hypotheses are equally likely, so use
                # only the first beam
                probs = probs.unfold(0, 1, beam_size).squeeze(2).contiguous()
                scores = scores.type_as(probs)
                scores_buf = scores_buf.type_as(probs)
            elif not self.sampling:
                # make probs contain cumulative scores for each hypothesis
                probs.add_(scores[:, step - 1].view(-1, 1))

            probs[:, self.pad] = -math.inf  # never select pad
            probs[:, self.unk] -= self.unk_penalty  # apply unk penalty

            # Record attention scores
            attn[:, :, step + 1].copy_(avg_attn_scores)

            cand_scores = buffer('cand_scores', type_of=scores)
            cand_indices = buffer('cand_indices')
            cand_beams = buffer('cand_beams')
            eos_bbsz_idx = buffer('eos_bbsz_idx')
            eos_scores = buffer('eos_scores', type_of=scores)
            if step < maxlen:
                if prefix_tokens is not None and step < prefix_tokens.size(1):
                    probs_slice = probs.view(bsz, -1, probs.size(-1))[:, 0, :]
                    cand_scores = torch.gather(
                        probs_slice,
                        dim=1,
                        index=prefix_tokens[:, step].view(-1, 1).data).expand(
                            -1, cand_size)
                    cand_indices = prefix_tokens[:, step].view(-1, 1).expand(
                        bsz, cand_size).data
                    cand_beams.resize_as_(cand_indices).fill_(0)
                elif self.sampling:
                    assert self.pad == 1, 'sampling assumes the first two symbols can be ignored'

                    if self.sampling_topk > 0:
                        values, indices = probs[:, 2:].topk(self.sampling_topk)
                        exp_probs = values.div_(
                            self.sampling_temperature).exp()
                        if step == 0:
                            torch.multinomial(exp_probs,
                                              beam_size,
                                              replacement=True,
                                              out=cand_indices)
                        else:
                            torch.multinomial(exp_probs,
                                              1,
                                              replacement=True,
                                              out=cand_indices)
                        torch.gather(exp_probs,
                                     dim=1,
                                     index=cand_indices,
                                     out=cand_scores)
                        torch.gather(indices,
                                     dim=1,
                                     index=cand_indices,
                                     out=cand_indices)
                        cand_indices.add_(2)
                    else:
                        exp_probs = probs.div_(
                            self.sampling_temperature).exp_().view(
                                -1, self.vocab_size)

                        if step == 0:
                            # we exclude the first two vocab items, one of which is pad
                            torch.multinomial(exp_probs[:, 2:],
                                              beam_size,
                                              replacement=True,
                                              out=cand_indices)
                        else:
                            torch.multinomial(exp_probs[:, 2:],
                                              1,
                                              replacement=True,
                                              out=cand_indices)

                        cand_indices.add_(2)
                        torch.gather(exp_probs,
                                     dim=1,
                                     index=cand_indices,
                                     out=cand_scores)

                    cand_scores.log_()
                    cand_indices = cand_indices.view(bsz, -1).repeat(1, 2)
                    cand_scores = cand_scores.view(bsz, -1).repeat(1, 2)
                    if step == 0:
                        cand_beams = torch.zeros(
                            bsz, cand_size).type_as(cand_indices)
                    else:
                        cand_beams = torch.arange(0, beam_size).repeat(
                            bsz, 2).type_as(cand_indices)
                        # make scores cumulative
                        cand_scores.add_(
                            torch.gather(
                                scores[:, step - 1].view(bsz, beam_size),
                                dim=1,
                                index=cand_beams,
                            ))
                else:
                    # take the best 2 x beam_size predictions. We'll choose the first
                    # beam_size of these which don't predict eos to continue with.
                    torch.topk(
                        probs.view(bsz, -1),
                        k=min(cand_size,
                              probs.view(bsz, -1).size(1) -
                              1),  # -1 so we never select pad
                        out=(cand_scores, cand_indices),
                    )
                    torch.div(cand_indices, self.vocab_size, out=cand_beams)
                    cand_indices.fmod_(self.vocab_size)
            else:
                # finalize all active hypotheses once we hit maxlen
                # pick the hypothesis with the highest prob of EOS right now
                torch.sort(
                    probs[:, self.eos],
                    descending=True,
                    out=(eos_scores, eos_bbsz_idx),
                )
                num_remaining_sent -= len(
                    finalize_hypos(step, eos_bbsz_idx, eos_scores))
                assert num_remaining_sent == 0
                break

            # cand_bbsz_idx contains beam indices for the top candidate
            # hypotheses, with a range of values: [0, bsz*beam_size),
            # and dimensions: [bsz, cand_size]
            cand_bbsz_idx = cand_beams.add(bbsz_offsets)

            # finalize hypotheses that end in eos
            eos_mask = cand_indices.eq(self.eos)

            finalized_sents = set()
            if step >= self.minlen:
                # only consider eos when it's among the top beam_size indices
                torch.masked_select(
                    cand_bbsz_idx[:, :beam_size],
                    mask=eos_mask[:, :beam_size],
                    out=eos_bbsz_idx,
                )
                if eos_bbsz_idx.numel() > 0:
                    torch.masked_select(
                        cand_scores[:, :beam_size],
                        mask=eos_mask[:, :beam_size],
                        out=eos_scores,
                    )
                    finalized_sents = finalize_hypos(step, eos_bbsz_idx,
                                                     eos_scores, cand_scores)
                    num_remaining_sent -= len(finalized_sents)

            assert num_remaining_sent >= 0
            if num_remaining_sent == 0:
                break
            assert step < maxlen

            if len(finalized_sents) > 0:
                new_bsz = bsz - len(finalized_sents)

                # construct batch_idxs which holds indices of batches to keep for the next pass
                batch_mask = torch.ones(bsz).type_as(cand_indices)
                batch_mask[cand_indices.new(finalized_sents)] = 0
                batch_idxs = batch_mask.nonzero().squeeze(-1)

                eos_mask = eos_mask[batch_idxs]
                cand_beams = cand_beams[batch_idxs]
                bbsz_offsets.resize_(new_bsz, 1)
                cand_bbsz_idx = cand_beams.add(bbsz_offsets)

                cand_scores = cand_scores[batch_idxs]
                cand_indices = cand_indices[batch_idxs]
                if prefix_tokens is not None:
                    prefix_tokens = prefix_tokens[batch_idxs]

                scores = scores.view(bsz, -1)[batch_idxs].view(
                    new_bsz * beam_size, -1)
                scores_buf.resize_as_(scores)
                tokens = tokens.view(bsz, -1)[batch_idxs].view(
                    new_bsz * beam_size, -1)
                tokens_buf.resize_as_(tokens)
                attn = attn.view(bsz,
                                 -1)[batch_idxs].view(new_bsz * beam_size,
                                                      attn.size(1), -1)
                attn_buf.resize_as_(attn)
                bsz = new_bsz
            else:
                batch_idxs = None

            # set active_mask so that values > cand_size indicate eos hypos
            # and values < cand_size indicate candidate active hypos.
            # After, the min values per row are the top candidate active hypos
            active_mask = buffer('active_mask')
            torch.add(
                eos_mask.type_as(cand_offsets) * cand_size,
                cand_offsets[:eos_mask.size(1)],
                out=active_mask,
            )

            # get the top beam_size active hypotheses, which are just the hypos
            # with the smallest values in active_mask
            active_hypos, _ignore = buffer('active_hypos'), buffer('_ignore')
            torch.topk(active_mask,
                       k=beam_size,
                       dim=1,
                       largest=False,
                       out=(_ignore, active_hypos))
            active_bbsz_idx = buffer('active_bbsz_idx')
            torch.gather(
                cand_bbsz_idx,
                dim=1,
                index=active_hypos,
                out=active_bbsz_idx,
            )
            active_scores = torch.gather(
                cand_scores,
                dim=1,
                index=active_hypos,
                out=scores[:, step].view(bsz, beam_size),
            )

            active_bbsz_idx = active_bbsz_idx.view(-1)
            active_scores = active_scores.view(-1)

            # copy tokens and scores for active hypotheses
            torch.index_select(
                tokens[:, :step + 1],
                dim=0,
                index=active_bbsz_idx,
                out=tokens_buf[:, :step + 1],
            )
            torch.gather(
                cand_indices,
                dim=1,
                index=active_hypos,
                out=tokens_buf.view(bsz, beam_size, -1)[:, :, step + 1],
            )
            if step > 0:
                torch.index_select(
                    scores[:, :step],
                    dim=0,
                    index=active_bbsz_idx,
                    out=scores_buf[:, :step],
                )
            torch.gather(
                cand_scores,
                dim=1,
                index=active_hypos,
                out=scores_buf.view(bsz, beam_size, -1)[:, :, step],
            )

            # copy attention for active hypotheses
            torch.index_select(
                attn[:, :, :step + 2],
                dim=0,
                index=active_bbsz_idx,
                out=attn_buf[:, :, :step + 2],
            )

            # swap buffers
            tokens, tokens_buf = tokens_buf, tokens
            scores, scores_buf = scores_buf, scores
            attn, attn_buf = attn_buf, attn

            # reorder incremental state in decoder
            reorder_state = active_bbsz_idx

            # print("RESULT")
            # print(scores[0])
            # print([self.tgt_dict.symbols[x] for x in tokens[0]][:5])
            # print([self.tgt_dict.symbols[x] for x in tokens[0]][:5])
            # print([self.tgt_dict.symbols[x] for x in tokens[1]][:5])
            # print([self.tgt_dict.symbols[x] for x in tokens[2]][:5])

            # print([x for x in tokens[0]][:5])
            # print([self.tgt_dict.symbols[x] for x in tokens[0]][:5])
            # print([x for x in tokens[1]][:5])
            # print([x for x in tokens[4]][:5])
            # print(self.tgt_dict.symbols[1115],self.tgt_dict.symbols[5741])
            # ,scores,self.tgt_dict.string(tokens[0]),"try",self.tgt_dict.string([1,62,4]))

        # sort by score descending
        for sent in range(len(finalized)):
            finalized[sent] = sorted(finalized[sent],
                                     key=lambda r: r['score'],
                                     reverse=True)

        return finalized
コード例 #42
0
ファイル: utils.py プロジェクト: wx-b/trustworthyAI
def matrix_poly(matrix, d):
    x = torch.eye(d).to(device) + torch.div(matrix.to(device), d).to(device)
    return torch.matrix_power(x, d)
コード例 #43
0
def _record_eta_batchwise(model, X, y, args):

    epsilon = args.epsilon_attack
    num_steps = args.num_steps_attack
    step_size = epsilon * 8 / num_steps
    smth_avg_steps = args.smth_avg_steps
    num_avg_steps = args.grad_avg_steps

    device = args.device

    print("epsilon is:{}".format(epsilon))
    print("num_steps is:{}".format(num_steps))

    X_pgd = Variable(X.data, requires_grad=True)
    if args.random:
        random_noise = torch.FloatTensor(*X_pgd.shape).normal_(
            mean=0, std=2 * epsilon).to(
                device)  #.uniform_(-epsilon, epsilon).to(device)
        random_noise_reshaped = random_noise.view(random_noise.size(0), -1)
        random_noise_reshaped_norm = torch.norm(random_noise_reshaped,
                                                p=2,
                                                dim=1,
                                                keepdim=True)
        all_epsilon_vec = (epsilon * torch.ones([
            random_noise_reshaped_norm.size(0),
            random_noise_reshaped_norm.size(1)
        ])).type_as(random_noise_reshaped_norm)
        random_noise_reshaped_normzed = epsilon * torch.div(
            random_noise_reshaped,
            torch.max(random_noise_reshaped_norm, all_epsilon_vec).expand(
                -1, random_noise_reshaped.size(1)) + 1e-8)
        random_noise_final = random_noise_reshaped_normzed.view(
            X_pgd.size(0), X_pgd.size(1), X_pgd.size(2), X_pgd.size(3))

        X_pgd = Variable(X_pgd.data + random_noise_final, requires_grad=True)

    for _ in range(num_steps):
        opt = optim.SGD([X_pgd], lr=1e-3)
        opt.zero_grad()

        ### Here you add averaging...
        for avg_ in range(num_avg_steps):
            noi_z = model(X_pgd, args)
            soft_z = F.softmax(noi_z, dim=1)
            if avg_ == 0:
                soft_z_avg = soft_z
            else:
                soft_z_avg = soft_z_avg + soft_z

        soft_z_avg = soft_z_avg / float(num_avg_steps)
        logsoftmax = torch.log(soft_z_avg.clamp(min=1e-20))
        loss = F.nll_loss(logsoftmax, y)
        loss.backward()
        X_pgd_grad = X_pgd.grad.data
        X_pgd_grad_reshaped = X_pgd_grad.view(X_pgd_grad.size(0), -1)
        X_pgd_grad_reshaped_norm = torch.norm(X_pgd_grad_reshaped,
                                              p=2,
                                              dim=1,
                                              keepdim=True)
        X_pgd_grad_reshaped_normzed = torch.div(
            X_pgd_grad_reshaped,
            X_pgd_grad_reshaped_norm.expand(-1, X_pgd_grad_reshaped.size(1)) +
            1e-8)
        X_pgd_grad_normzed = X_pgd_grad_reshaped_normzed.view(
            X_pgd_grad.size(0), X_pgd_grad.size(1), X_pgd_grad.size(2),
            X_pgd_grad.size(3))
        eta = step_size * X_pgd_grad_normzed.data

        X_pgd = Variable(X_pgd.data + eta, requires_grad=True)

        eta_tot = X_pgd.data - X.data

        eta_tot_reshaped = eta_tot.view(eta_tot.size(0), -1)
        eta_tot_reshaped_norm = torch.norm(eta_tot_reshaped,
                                           p=2,
                                           dim=1,
                                           keepdim=True)
        all_epsilon_vec = (epsilon * torch.ones(
            [eta_tot_reshaped_norm.size(0),
             eta_tot_reshaped_norm.size(1)])).type_as(eta_tot_reshaped_norm)
        eta_tot_reshaped_normzed = epsilon * torch.div(
            eta_tot_reshaped,
            torch.max(eta_tot_reshaped_norm, all_epsilon_vec).expand(
                -1, eta_tot_reshaped.size(1)) + 1e-8)
        eta_tot_final = eta_tot_reshaped_normzed.view(X_pgd_grad.size(0),
                                                      X_pgd_grad.size(1),
                                                      X_pgd_grad.size(2),
                                                      X_pgd_grad.size(3))

        X_pgd = Variable(torch.clamp(X.data + eta_tot_final.data, 0, 1.0),
                         requires_grad=True)
        #X_pgd = Variable(torch.clamp(X_pgd, 0, 1.0), requires_grad=True)

    with torch.no_grad():
        for step in range(smth_avg_steps):

            out = model(X.data, args)
            out_pgd = model(X_pgd.data, args)

            if step != 0:
                cum_counts = cum_counts + (torch.max(
                    out.data, dim=1, keepdim=True)[0].repeat(
                        1, out.data.size(1)) == out.data).float()
                cum_counts_pgd = cum_counts_pgd + (torch.max(
                    out_pgd.data, dim=1, keepdim=True)[0].repeat(
                        1, out_pgd.data.size(1)) == out_pgd.data).float()
            else:
                cum_counts = (torch.max(
                    out.data, dim=1, keepdim=True)[0].repeat(
                        1, out.data.size(1)) == out.data).float()
                cum_counts_pgd = (torch.max(
                    out_pgd.data, dim=1, keepdim=True)[0].repeat(
                        1, out_pgd.data.size(1)) == out_pgd.data).float()

        err = (cum_counts.data.max(1)[1] != y.data).float().sum()
        err_pgd = (cum_counts_pgd.data.max(1)[1] != y.data).float().sum()
        eta_final = X_pgd.data - X.data
        print('err nat: ', err)
        print('err pgd (white-box): ', err_pgd)

    return X_pgd.data, err_pgd, eta_final
コード例 #44
0
    def step(self, closure=None):
        loss = None
        if closure is not None:
            loss = closure()
        for group in self.param_groups:
            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data

                if grad.is_sparse:
                    raise RuntimeError(
                        'Adam does not support sparse gradients, please consider SparseAdam instead'
                    )

                flag1, flag2 = self._check_shape(grad.size())
                new_shape = p.data.size()
                if flag2 and group['enable_factorization']:
                    new_shape, old_shape =\
                    self._experimental_reshape(p.data.size())
                    grad = grad.view(new_shape)

                state = self.state[p]
                if len(state) == 0:
                    state['step'] = 0
                    if group['enable_momentum']:
                        state['exp_avg'] = torch.zeros(new_shape,
                                                       dtype=torch.float32,
                                                       device=p.grad.device)

                    if flag1 and group['enable_factorization']:
                        state['exp_avg_sq_R'] = torch.zeros(
                            (1, new_shape[1]),
                            dtype=torch.float32,
                            device=p.grad.device)
                        state['exp_avg_sq_C'] = torch.zeros(
                            (new_shape[0], 1),
                            dtype=torch.float32,
                            device=p.grad.device)
                    else:
                        state['exp_avg_sq'] = torch.zeros(new_shape,
                                                          dtype=torch.float32,
                                                          device=p.grad.device)
                    if group['ams_grad']:
                        state['exp_avg_sq_hat'] = torch.zeros(
                            new_shape,
                            dtype=torch.float32,
                            device=p.grad.device)

                if group['enable_momentum']:
                    exp_avg = state['exp_avg']

                if flag1 and group['enable_factorization']:
                    exp_avg_sq_R = state['exp_avg_sq_R']
                    exp_avg_sq_C = state['exp_avg_sq_C']
                else:
                    exp_avg_sq = state['exp_avg_sq']

                if group['ams_grad']:
                    exp_avg_sq_hat = state['exp_avg_sq_hat']

                state['step'] += 1
                lr_t = group['lr'](state['step'])
                if group['relative_step_size']:
                    lr_t *= max(group['eps2'], self._rms(data))

                if group['enable_momentum']:
                    beta1_t = group['beta1'](state['step'])
                    exp_avg.mul_(beta1_t).add_(1 - beta1_t, grad)

                beta2_t = group['beta2'](state['step'])

                if flag1 and group['enable_factorization']:
                    exp_avg_sq_R.mul_(beta2_t).add_(
                        1 - beta2_t,
                        torch.sum(torch.mul(grad, grad).add_(group['eps1']),
                                  dim=0,
                                  keepdim=True))
                    exp_avg_sq_C.mul_(beta2_t).add_(
                        1 - beta2_t,
                        torch.sum(torch.mul(grad, grad).add_(group['eps1']),
                                  dim=1,
                                  keepdim=True))
                    v = torch.mul(exp_avg_sq_C,
                                  exp_avg_sq_R).div_(torch.sum(exp_avg_sq_R))
                else:
                    exp_avg_sq.mul_(beta2_t).addcmul_(
                        1 - beta2_t, grad, grad).add_(
                            (1 - beta2_t) * group['eps1'])
                    v = exp_avg_sq

                g = grad
                if group['enable_momentum']:
                    g = torch.div(exp_avg, 1 - beta1_t**state['step'])

                if group['ams_grad']:
                    torch.max(exp_avg_sq_hat, v, out=exp_avg_sq_hat)
                    v = exp_avg_sq_hat
                    u = torch.div(g, (torch.div(
                        v, 1 - beta2_t**state['step'])).sqrt().add_(
                            group['eps1']))
                else:
                    u = torch.div(g, v.sqrt())

                u.div_(max(1, self._rms(u) / group['cliping_threshold']))
                p.data.add_(-lr_t * (u.view(old_shape) if flag2
                                     and group['enable_factorization'] else u))

                if group['weight_decay'] != 0:
                    p.data.add_(-group['weight_decay'] * lr_t, p.data)

        return loss
コード例 #45
0
def train_update(model, GAN_model, criterion, GAN_criterion, optimizer, GAN_optimizer, pos_feats, neg_feats, maxiter, in_layer='fc4'):
    batch_pos = opts['batch_pos']
    batch_neg = opts['batch_neg']
    batch_test = opts['batch_test']
    batch_neg_cand = max(opts['batch_neg_cand'], batch_neg)

    pos_idx = np.random.permutation(pos_feats.size(0))
    neg_idx = np.random.permutation(neg_feats.size(0))
    while(len(pos_idx) < batch_pos*maxiter):
        pos_idx = np.concatenate([pos_idx, np.random.permutation(pos_feats.size(0))])
    while(len(neg_idx) < batch_neg_cand*maxiter):
        neg_idx = np.concatenate([neg_idx, np.random.permutation(neg_feats.size(0))])
    pos_pointer = 0
    neg_pointer = 0

    for iter in range(maxiter):
        # select pos idx
        pos_next = pos_pointer+batch_pos
        pos_cur_idx = pos_idx[pos_pointer:pos_next]
        pos_cur_idx = pos_feats.new(pos_cur_idx).long()
        pos_pointer = pos_next

        # select neg idx
        neg_next = neg_pointer+batch_neg_cand
        neg_cur_idx = neg_idx[neg_pointer:neg_next]
        neg_cur_idx = neg_feats.new(neg_cur_idx).long()
        neg_pointer = neg_next

        # create batch
        batch_pos_feats = Variable(pos_feats.index_select(0, pos_cur_idx))
        batch_neg_feats = Variable(neg_feats.index_select(0, neg_cur_idx))

        # hard negative mining
        if batch_neg_cand > batch_neg:
            model.eval()
            for start in range(0, batch_neg_cand, batch_test):
                end = min(start+batch_test, batch_neg_cand)
                score = model(batch_neg_feats[start:end], in_layer=in_layer)
                if start==0:
                    neg_cand_score = score.data[:, 1].clone()
                else:
                    neg_cand_score = torch.cat((neg_cand_score, score.data[:, 1].clone()), 0)

            _, top_idx = neg_cand_score.topk(batch_neg)
            batch_neg_feats = batch_neg_feats.index_select(0, Variable(top_idx))

        # mask positive features using GAN
        batch_pos_feats_backup = batch_pos_feats.data.clone()
        GAN_model.eval()
        feat_asdn = GAN_model(batch_pos_feats).view(-1, 3, 3)
        num = feat_asdn.shape[0]
        mask_asdn = torch.ones(num, 512, 3, 3)
        if opts['use_gpu']:
            mask_asdn = mask_asdn.cuda()
        for i in range(num):
            feat_ = feat_asdn[0, :].data.clone()
            _, idxlist = torch.topk(feat_, 3, largest=False)

            for j in range(len(idxlist)):
                idx = idxlist[j]
                row = int(math.floor(j/3))
                col = j % 3
                mask_asdn[:, :, col, row] = 0
        batch_pos_feats = batch_pos_feats.mul(mask_asdn.view(num, -1))

        # forward
        model.train()
        pos_score = model(batch_pos_feats, in_layer=in_layer)
        neg_score = model(batch_neg_feats, in_layer=in_layer)

        # optimize
        loss = criterion(pos_score, neg_score)
        model.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), opts['grad_clip'])
        optimizer.step()

        print('Finetune FC: Iter' + str(iter+1) + ', Loss ' + str(loss.item()))

        # --------- train GAN ---------
        tic = time.time()
        GAN_mask_batch_size = opts['GAN_batch_size']
        objective = torch.zeros(1, maxiter)
        # Evaluate mask
        # print('Evaluating Mask')
        n = pos_feats.size(0)
        prob_k = torch.zeros(9, 1)
        for k in range(0, 9):
            row = int(math.floor(k/3))
            col = k % 3
            batch = batch_pos_feats_backup.data.clone()
            batch = batch.view(-1, 512, 3, 3)
            batch[:, :, col, row] = 0
            batch = batch.view(batch.size(0), -1)

            # prepare label
            model.eval()
            feats = model(batch, in_layer='fc4')

            # calcute zero position
            X = feats
            X_max = torch.max(feats, dim=1)[0]
            X_max = X_max.repeat(2, 1).permute(1, 0)
            E = torch.exp(feats-X_max)
            L = torch.sum(E, 1)
            Y = torch.div(E, L.repeat(2, 1).permute(1, 0))
            prob_k[k] = torch.sum(Y, dim=0)[0]
            # print('mask {}, value: {:.3f}'.format(k, prob_k[k][0]))
            _, idx = torch.min(prob_k, 0)
            row = int(math.floor(idx/3))
            col = idx % 3
        # train
        batch = batch_pos_feats_backup.data.clone()
        GAN_batch_size = opts['GAN_batch_size']
        labels = torch.ones(3, 3, 1, GAN_batch_size)
        labels[col, row, :] = 0
        if opts['use_gpu']:
            labels = labels.cuda()
        GAN_model.train()
        GAN_score = GAN_model(batch).view(3, 3, 1, GAN_batch_size)

        # optimize
        GAN_loss = GAN_criterion(GAN_score, labels)
        GAN_model.zero_grad()
        GAN_loss.backward()
        torch.nn.utils.clip_grad_norm_(GAN_model.parameters(), opts['grad_clip'])
        GAN_optimizer.step()

        # result
        objective[:, iter] = loss.item() / GAN_batch_size
        print "Finetun GAN: Iter %d, Loss %.4f, Time %.3f" % (iter+1, torch.mean(objective[:, 0:iter+1], dim=1).data, time.time()-tic)
    return
コード例 #46
0
def l2_norm(input, axit=1):
    norm = torch.norm(input,2,axit,True)
    output = torch.div(input, norm)
    return output
コード例 #47
0
 def divide(self, tensor_in_1, tensor_in_2):
     return torch.div(tensor_in_1, tensor_in_2)
コード例 #48
0
 def forward(self, feature):
     epsilon = 1e-6
     norm = torch.pow(torch.sum(torch.pow(feature, 2), 1) + epsilon, 0.5).unsqueeze(1).expand_as(feature)
     return torch.div(feature, norm)
コード例 #49
0
def prior_loss(prior_std):
    prior_loss = 0.0
    for var in net.parameters():
        nn = torch.div(var, prior_std)
        prior_loss += torch.sum(nn * nn)
    return 0.5 * prior_loss
コード例 #50
0
    def _sample(self,
                img,
                ppls,
                num,
                pos_emb=None,
                spa_adj_matrix=None,
                sem_adj_matrix=None,
                opt={}):
        sample_max = opt.get('sample_max', 1)
        beam_size = opt.get('beam_size', 1)
        temperature = opt.get('temperature', 1.0)
        inference_mode = opt.get('inference_mode', True)

        batch_size = img.size(0)
        rois_num = ppls.size(1)

        if beam_size > 1 or self.cbs:
            return self._sample_beam(img, ppls, num, pos_emb, spa_adj_matrix,
                                     sem_adj_matrix, opt)

        if self.finetune_cnn:
            conv_feats, fc_feats = self.cnn(img)
        else:
            with torch.no_grad():
                conv_feats, fc_feats = self.cnn(img.data)
                # conv_feats, fc_feats = self.cnn(Variable(img.data, volatile=True))
                # conv_feats = Variable(conv_feats.data)
                # fc_feats = Variable(fc_feats.data)

        # conv_feats, fc_feats = self.cnn(img)
        rois = ppls.data.new(batch_size, rois_num, 5)
        rois[:, :, 1:] = ppls.data[:, :, :4]

        for i in range(batch_size):
            rois[i, :, 0] = i
        pool_feats = self.roi_align(conv_feats, Variable(rois.view(-1, 5)))
        pool_feats = pool_feats.view(batch_size, rois_num, self.att_feat_size)

        # relationship
        pool_feats, _ = self.add_relation_feat(pool_feats, pos_emb,
                                               spa_adj_matrix, sem_adj_matrix)

        loc_input = ppls.data.new(batch_size, rois_num, 5)
        loc_input[:, :, :4] = ppls.data[:, :, :4] / self.image_crop_size
        loc_input[:, :, 4] = ppls.data[:, :, 5]
        loc_feats = self.loc_fc(Variable(loc_input))

        label_input = ppls.data.new(batch_size, rois_num).long()
        label_input[:, :] = ppls.data[:, :, 4]
        label_feat = self.det_fc(Variable(label_input))

        # pool_feats = pool_feats + label_feat
        pool_feats = torch.cat((pool_feats, loc_feats, label_feat), 2)
        # transpose the conv_feats
        conv_feats = conv_feats.view(batch_size, self.att_feat_size,
                                     -1).transpose(1, 2).contiguous()
        # embed fc and att feats
        pool_feats = self.pool_embed(pool_feats)
        fc_feats = self.fc_embed(fc_feats)
        conv_feats = self.att_embed(conv_feats)

        # Project the attention feats first to reduce memory and computation comsumptions.
        p_conv_feats = self.ctx2att(conv_feats)
        p_pool_feats = self.ctx2pool(pool_feats)

        vis_offset = (torch.arange(0, batch_size) *
                      rois_num).view(batch_size).type_as(ppls.data).long()
        roi_offset = (torch.arange(0, batch_size) *
                      (rois_num + 1)).view(batch_size).type_as(
                          ppls.data).long()

        # constructing the mask.
        pnt_mask = ppls.data.new(batch_size, rois_num + 1).byte().fill_(1)
        for i in range(batch_size):
            pnt_mask[i, :num.data[i, 1] + 1] = 0
        pnt_mask = Variable(pnt_mask)
        pnt_mask_list = []
        pnt_mask_list.append(pnt_mask)

        att_mask = pnt_mask.clone()
        state = self.init_hidden(batch_size)

        seq = []
        seqLogprobs = []
        bn_seq = []
        bnLogprobs = []
        fg_seq = []
        fgLogprobs = []

        for t in range(self.seq_length + 1):
            if t == 0:  # input <bos>
                it = fc_feats.data.new(batch_size).long().zero_()
            elif sample_max:
                sampleLogprobs, it = torch.max(logprobs.data, 1)
                it = it.view(-1).long()
            else:
                if temperature == 1.0:
                    prob_prev = torch.exp(
                        logprobs.data
                    )  # fetch prev distribution: shape Nx(M+1)
                else:
                    # scale logprobs by temperature
                    prob_prev = torch.exp(torch.div(logprobs.data,
                                                    temperature))
                it = torch.multinomial(prob_prev, 1)
                sampleLogprobs = logprobs.gather(
                    1,
                    Variable(it))  # gather the logprobs at sampled positions
                it = it.view(
                    -1).long()  # and flatten indices for downstream processing

            roi_idx = it.clone() - self.vocab_size - 1  # starting from 0
            roi_mask = roi_idx < 0
            roi_idx_offset = roi_idx + vis_offset
            roi_idx_offset[roi_mask] = 0

            vis_idx = ppls.data[:, :,
                                4].clone().view(-1)[roi_idx_offset].long()
            vis_idx[roi_mask] = 0

            # if inference_mode:
            # if the roi_idx is selected, we need to make sure this is not selected again.
            pnt_idx_offset = roi_idx + roi_offset + 1
            pnt_idx_offset[roi_mask] = 0
            pnt_mask_new = pnt_mask_list[-1].data.clone()
            pnt_mask_new.view(-1)[pnt_idx_offset] = 1
            pnt_mask_new.view(-1)[0] = 0
            pnt_mask_list.append(Variable(pnt_mask_new))

            # tmp_feat = concat_feat.view(-1, self.rnn_size)[tmp_idx_offset]
            # we need to convert the roi index to label index.
            it_new = it.clone()
            it_new[it > self.vocab_size] = (vis_idx[roi_mask == 0] +
                                            self.vocab_size)
            xt = self.embed(Variable(it_new))

            if t >= 1:
                # do the cascade caption refinement here
                roi_labels = pool_feats.data.new(batch_size * rois_num).zero_()
                if (roi_mask == 0).sum() > 0:
                    roi_labels[roi_idx_offset[roi_mask == 0]] = 1
                roi_labels = roi_labels.view(batch_size, 1, rois_num)

                bn_logprob, fg_logprob = self.ccr_core(vis_idx, pool_feats, \
                                                       rnn_output.view(batch_size, 1, self.rnn_size),
                                                       Variable(roi_labels), batch_size, 1)
                bn_logprob = bn_logprob.view(batch_size, -1)
                fg_logprob = fg_logprob.view(batch_size, -1)

                if sample_max:
                    slp_bn, it_bn = torch.max(bn_logprob.data, 1)
                    slp_fg, it_fg = torch.max(fg_logprob.data, 1)
                else:
                    if temperature == 1.0:
                        bn_prob_prev = torch.exp(bn_logprob.data)
                        fg_prob_prev = torch.exp(fg_logprob.data)
                    else:
                        bn_prob_prev = torch.exp(
                            torch.div(bn_logprob.data, temperature))
                        fg_prob_prev = torch.exp(
                            torch.div(fg_logprob.data, temperature))

                    it_bn = torch.multinomial(bn_prob_prev, 1)
                    it_fg = torch.multinomial(fg_prob_prev, 1)

                    slp_bn = bn_logprob.gather(1, Variable(
                        it_bn))  # gather the logprobs at sampled positions
                    slp_fg = fg_logprob.gather(1, Variable(
                        it_fg))  # gather the logprobs at sampled positions

                it_bn[roi_mask] = 0
                it_fg[roi_mask] = 0

                # stop when all finished
                if t == 1:
                    unfinished = it > 0
                else:
                    unfinished = unfinished * (it > 0)
                # if unfinished.sum() == 0:
                # break
                # continue
                it = it * unfinished.type_as(it)
                seq.append(it)  # seq[t] the input of t+2 time step
                seqLogprobs.append(sampleLogprobs.view(-1))
                bn_seq.append(it_bn)
                bnLogprobs.append(slp_bn.view(-1))
                fg_seq.append(it_fg)
                fgLogprobs.append(slp_fg.view(-1))

            rnn_output, det_prob, state = self.core(xt, fc_feats, conv_feats,
                                                    p_conv_feats, pool_feats,
                                                    p_pool_feats, att_mask,
                                                    pnt_mask_list[-1], state)

            # pnt_mask = pnt_mask_new # update the mask

            det_prob = F.log_softmax(det_prob, dim=1)
            decoded = F.log_softmax(self.beta * self.logit(rnn_output), dim=1)
            lambda_v = det_prob[:, 0].contiguous()
            prob_det = det_prob[:, 1:].contiguous()

            decoded = decoded + lambda_v.view(batch_size, 1).expand_as(decoded)
            logprobs = torch.cat([decoded, prob_det], 1)
            # logprobs = torch.log(decoded)

        seq = torch.cat([_.unsqueeze(1) for _ in seq], 1)
        seqLogprobs = torch.cat([_.unsqueeze(1) for _ in seqLogprobs], 1)
        bn_seq = torch.cat([_.unsqueeze(1) for _ in bn_seq], 1)
        bnLogprobs = torch.cat([_.unsqueeze(1) for _ in bnLogprobs], 1)
        fg_seq = torch.cat([_.unsqueeze(1) for _ in fg_seq], 1)
        fgLogprobs = torch.cat([_.unsqueeze(1) for _ in fgLogprobs], 1)

        return seq, bn_seq, fg_seq, seqLogprobs, bnLogprobs, fgLogprobs
コード例 #51
0
import numpy as np
import torch
import torch.nn as nn

# -----------------------------------  KLDiv loss

loss_f = nn.KLDivLoss(reduction='none')
loss_f_mean = nn.KLDivLoss(reduction='batchmean')

# 生成网络输出 以及 目标输出
output = torch.from_numpy(np.array([[0.1132, 0.5477, 0.3390]])).float()
output.requires_grad = True
target = torch.from_numpy(np.array([[0.8541, 0.0511, 0.0947]])).float()

loss_1 = loss_f(output, target)
loss_mean = loss_f_mean(output, target)

print('\nloss: ', loss_1)
print('\nloss_mean: ', torch.div(loss_mean, 3))

# 熟悉计算公式,手动计算样本的第一个元素的loss,注意这里只有一个样本,是 element-wise计算的

output = output[0].detach().numpy()
output_1 = output[0]  # 第一个样本的第一个元素
target_1 = target[0][0].numpy()

loss_1 = target_1 * (np.log(target_1) - output_1)

print('\n第一个样本第一个元素的loss:', loss_1)
コード例 #52
0
def conditional_distributions_loss(model,
                                   x,
                                   t,
                                   e,
                                   pdf_u,
                                   pdf_c,
                                   hr_loss=False,
                                   imbalance_loss=False,
                                   elbo=True,
                                   risk='1'):

    shape_weibull, scale_weibull, gates_weibull, shape_lognormal, scale_lognormal, logits_lognormal, attention_weights = model.forward(
        x)

    lossf_lognormal = []
    losss_lognormal = []

    hr_lognormal = []
    for g in range(model.k):

        mu = shape_lognormal[:, g]
        sigma = scale_lognormal[:, g]

        f = -sigma - 0.5 * np.log(2 * np.pi)
        f = f - torch.div((torch.log(t) - mu)**2, 2. * torch.exp(2 * sigma))
        s = torch.div(torch.log(t) - mu, torch.exp(sigma) * np.sqrt(2))
        s = 0.5 - 0.5 * torch.erf(s)
        s = torch.log(s)

        lossf_lognormal.append(f)
        losss_lognormal.append(s)

        # negative partial log likelihood
        hr_lognormal.append(f - s)

    losss_lognormal = torch.stack(losss_lognormal, dim=1)
    lossf_lognormal = torch.stack(lossf_lognormal, dim=1)
    hr_lognormal = torch.stack(hr_lognormal, dim=1)

    if elbo:
        lossg_lognormal = nn.Softmax(dim=1)(logits_lognormal)
        losss_lognormal = lossg_lognormal * losss_lognormal
        lossf_lognormal = lossg_lognormal * lossf_lognormal

        losss_lognormal = losss_lognormal.sum(dim=1)
        lossf_lognormal = lossf_lognormal.sum(dim=1)

        hr_lognormal = lossg_lognormal * hr_lognormal
        hr_lognormal = hr_lognormal.sum(dim=1)
    else:
        lossg_lognormal = nn.LogSoftmax(dim=1)(logits_lognormal)
        losss_lognormal = lossg_lognormal + losss_lognormal
        lossf_lognormal = lossg_lognormal + lossf_lognormal
        losss_lognormal = torch.logsumexp(losss_lognormal, dim=1)
        lossf_lognormal = torch.logsumexp(lossf_lognormal, dim=1)

    # Weibull distriubtion
    shapes_weibull, scales_weibull = shape_weibull.exp(), (
        -scale_weibull).exp()
    lossf_weibull, losss_weibull = [], []
    hr_weibull = []
    for idx in range(model.k):

        eta = shapes_weibull[:, idx]
        beta = scales_weibull[:, idx]

        log_s_weibull = -(torch.pow(t / beta, eta))
        log_f_weibull = torch.log(eta) - torch.log(beta) + (
            (eta - 1) * (-torch.log(beta) + torch.log(t)))
        log_f_weibull = log_f_weibull + log_s_weibull

        lossf_weibull.append(log_f_weibull)
        losss_weibull.append(log_s_weibull)

        # negative partial log likelihood
        hr_weibull.append(torch.log(eta / beta * (t / beta)**(eta - 1)))

    losss_weibull = torch.stack(losss_weibull, dim=1)
    lossf_weibull = torch.stack(lossf_weibull, dim=1)
    hr_weibull = torch.stack(hr_weibull, dim=1)

    if elbo:
        lossg_weibull = nn.Softmax(dim=1)(gates_weibull)
        losss_weibull = lossg_weibull * losss_weibull
        lossf_weibull = lossg_weibull * lossf_weibull
        losss_weibull = losss_weibull.sum(dim=1)
        lossf_weibull = lossf_weibull.sum(dim=1)
        hr_weibull = hr_weibull * lossg_weibull
        hr_weibull = hr_weibull.sum(dim=1)
    else:
        lossg_weibull = nn.LogSoftmax(dim=1)(gates_weibull)
        losss_weibull = lossg_weibull + losss_weibull
        lossf_weibull = lossg_weibull + lossf_weibull
        losss_weibull = torch.logsumexp(losss_weibull, dim=1)
        lossf_weibull = torch.logsumexp(lossf_weibull, dim=1)

    # Combine

    lossf, losss = torch.stack([lossf_lognormal, lossf_weibull],
                               dim=1), torch.stack(
                                   [losss_lognormal, losss_weibull], dim=1)
    weights = nn.Softmax(dim=1)(attention_weights)
    #hr = torch.stack([hr_weibull, hr_lognormal], dim=1)
    hr = torch.stack(
        [lossf_lognormal - losss_lognormal, lossf_weibull - losss_weibull],
        dim=1)
    hr = hr * weights
    hr = hr.sum(dim=1)
    loss_neg = PartialLogLikelihood()(hr, e)

    lossf = lossf * weights
    losss = losss * weights
    lossf = lossf.sum(dim=1)
    losss = losss.sum(dim=1)

    #
    if imbalance_loss:
        try:
            idx_time = t.int().cpu().detach().numpy()
            pdf_u_ = torch.tensor(pdf_u).cuda()
            pdf_c_ = torch.tensor(pdf_c).cuda()
            lossf = lossf * (1 - pdf_u_[idx_time])  #.exp()
            losss = losss * (1 - pdf_c_[idx_time])  #.exp()
        except:
            pass

    uncens = np.where(e.cpu().data.numpy() == int(risk))[0]
    cens = np.where(e.cpu().data.numpy() != int(risk))[0]
    ll = lossf[uncens].sum() + model.discount * losss[cens].sum()

    if hr_loss and e.sum() > 0:
        return -ll / float(len(uncens) + len(cens)) + loss_neg * model.gamma
    else:
        return -ll / float(len(uncens) + len(cens))
コード例 #53
0
ファイル: vgg.py プロジェクト: matmons/TDT4265
 def forward(self, x):
     norm = x.pow(2).sum(dim=1, keepdim=True).sqrt() + self.eps
     x = torch.div(x, norm)
     out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x
     return out
コード例 #54
0
ファイル: beam_decode.py プロジェクト: lhu17/translate
    def _generate(self, encoder_input, beam_size=None, maxlen=None, prefix_tokens=None):

        src_tokens = encoder_input[0]

        bsz, srclen = src_tokens.size()
        maxlen = min(maxlen, self.maxlen) if maxlen is not None else self.maxlen

        # the max beam size is the dictionary size - 1, since we never select pad
        beam_size = beam_size if beam_size is not None else self.beam_size
        assert (
            beam_size < self.vocab_size
        ), "Beam size must be smaller than target vocabulary"

        # Encode, expanding outputs for each example beam_size times
        reorder_indices = torch.arange(bsz).view(-1, 1).repeat(1, beam_size).view(-1)
        encoder_outs, incremental_states = self._encode(
            encoder_input=encoder_input,
            reorder_indices=reorder_indices.type_as(src_tokens),
        )

        # initialize buffers
        scores = src_tokens.new(bsz * beam_size, maxlen + 1).float().fill_(0)
        scores_buf = scores.clone()
        tokens = src_tokens.new(bsz * beam_size, maxlen + 2).fill_(self.pad)
        tokens_buf = tokens.clone()
        tokens[:, 0] = self.eos

        # may differ from input length
        if isinstance(encoder_outs[0], (list, tuple)):
            src_encoding_len = encoder_outs[0][0].size(0)
        elif isinstance(encoder_outs[0], dict):
            src_encoding_len = encoder_outs[0]["encoder_out"].size(0)

        attn = scores.new(bsz * beam_size, src_encoding_len, maxlen + 2)
        attn_buf = attn.clone()

        # list of completed sentences
        finalized = [[] for i in range(bsz)]
        finished = [False for i in range(bsz)]
        worst_finalized = [{"idx": None, "score": -math.inf} for i in range(bsz)]
        num_remaining_sent = bsz

        # number of candidate hypos per step
        cand_size = 2 * beam_size  # 2 x beam size in case half are EOS

        # offset arrays for converting between different indexing schemes
        bbsz_offsets = (torch.arange(0, bsz) * beam_size).unsqueeze(1).type_as(tokens)
        cand_offsets = torch.arange(0, cand_size).type_as(tokens)

        # helper function for allocating buffers on the fly
        buffers = {}

        def buffer(name, type_of=tokens):  # noqa
            if name not in buffers:
                buffers[name] = type_of.new()
            return buffers[name]

        def is_finished(sent, step, unfinalized_scores=None):
            """
            Check whether we've finished generation for a given sentence, by
            comparing the worst score among finalized hypotheses to the best
            possible score among unfinalized hypotheses.
            """
            assert len(finalized[sent]) <= beam_size
            if len(finalized[sent]) == beam_size:
                if self.stop_early or step == maxlen or unfinalized_scores is None:
                    return True
                # stop if the best unfinalized score is worse than the worst
                # finalized one
                best_unfinalized_score = unfinalized_scores[sent].max()
                if self.normalize_scores:
                    best_unfinalized_score /= (maxlen + 1) ** self.len_penalty
                if worst_finalized[sent]["score"] >= best_unfinalized_score:
                    return True
            return False

        def finalize_hypos(step, bbsz_idx, eos_scores, unfinalized_scores=None):
            """
            Finalize the given hypotheses at this step, while keeping the total
            number of finalized hypotheses per sentence <= beam_size.

            Note: the input must be in the desired finalization order, so that
            hypotheses that appear earlier in the input are preferred to those
            that appear later.

            Args:
                step: current time step
                bbsz_idx: A vector of indices in the range [0, bsz*beam_size),
                    indicating which hypotheses to finalize
                eos_scores: A vector of the same size as bbsz_idx containing
                    scores for each hypothesis
                unfinalized_scores: A vector containing scores for all
                    unfinalized hypotheses
            """
            assert bbsz_idx.numel() == eos_scores.numel()

            # clone relevant token and attention tensors
            tokens_clone = tokens.index_select(0, bbsz_idx)
            tokens_clone = tokens_clone[
                :, 1 : step + 2
            ]  # skip the first index, which is EOS
            tokens_clone[:, step] = self.eos
            attn_clone = attn.index_select(0, bbsz_idx)[:, :, 1 : step + 2]

            # compute scores per token position
            pos_scores = scores.index_select(0, bbsz_idx)[:, : step + 1]
            pos_scores[:, step] = eos_scores
            # convert from cumulative to per-position scores
            pos_scores[:, 1:] = pos_scores[:, 1:] - pos_scores[:, :-1]

            # normalize sentence-level scores
            if self.normalize_scores:
                eos_scores /= (step + 1) ** self.len_penalty

            sents_seen = set()
            for i, (idx, score) in enumerate(
                zip(bbsz_idx.tolist(), eos_scores.tolist())
            ):
                sent = idx // beam_size
                sents_seen.add(sent)

                def get_hypo():
                    _, alignment = attn_clone[i].max(dim=0)
                    return {
                        "tokens": tokens_clone[i],
                        "score": score,
                        "attention": attn_clone[i],  # src_len x tgt_len
                        "alignment": alignment,
                        "positional_scores": pos_scores[i],
                    }

                if len(finalized[sent]) < beam_size:
                    finalized[sent].append(get_hypo())
                elif not self.stop_early and score > worst_finalized[sent]["score"]:
                    # replace worst hypo for this sentence with new/better one
                    worst_idx = worst_finalized[sent]["idx"]
                    if worst_idx is not None:
                        finalized[sent][worst_idx] = get_hypo()

                    # find new worst finalized hypo for this sentence
                    idx, s = min(
                        enumerate(finalized[sent]), key=lambda r: r[1]["score"]
                    )
                    worst_finalized[sent] = {"score": s["score"], "idx": idx}

            # return number of hypotheses finished this step
            num_finished = 0
            for sent in sents_seen:
                # check termination conditions for this sentence
                if not finished[sent] and is_finished(sent, step, unfinalized_scores):
                    finished[sent] = True
                    num_finished += 1
            return num_finished

        reorder_state = None
        for step in range(maxlen + 1):  # one extra step for EOS marker
            # reorder decoder internal states based on the prev choice of beams
            if reorder_state is not None:
                for model in self.models:
                    if isinstance(model.decoder, FairseqIncrementalDecoder):
                        model.decoder.reorder_incremental_state(
                            incremental_states[model], reorder_state
                        )
            # Run decoder for one step
            logprobs, avg_attn, possible_translation_tokens = self._decode(
                tokens[:, : step + 1], encoder_outs, incremental_states
            )

            if step == 0:
                # at the first step all hypotheses are equally likely, so use
                # only the first beam
                logprobs = logprobs.unfold(0, 1, beam_size).squeeze(2).contiguous()
                scores = scores.type_as(logprobs)
                scores_buf = scores_buf.type_as(logprobs)
            else:
                # make probs contain cumulative scores for each hypothesis
                logprobs.add_(scores[:, step - 1].view(-1, 1))
            logprobs[:, self.pad] = -math.inf  # never select pad

            # apply unk reward
            if possible_translation_tokens is None:
                unk_index = self.unk
            else:
                unk_index = torch.nonzero(possible_translation_tokens == self.unk)[0, 0]
            logprobs[:, unk_index] += self.unk_reward

            # external lexicon reward
            logprobs[:, self.lexicon_indices] += self.lexicon_reward

            logprobs += self.word_reward
            logprobs[:, self.eos] -= self.word_reward

            # Record attention scores
            attn[:, :, step + 1].copy_(avg_attn)

            cand_scores = buffer("cand_scores", type_of=scores)
            cand_indices = buffer("cand_indices")
            cand_beams = buffer("cand_beams")
            eos_bbsz_idx = buffer("eos_bbsz_idx")
            eos_scores = buffer("eos_scores", type_of=scores)
            if step < maxlen:
                if prefix_tokens is not None and step < prefix_tokens.size(1):
                    logprobs_slice = logprobs.view(bsz, -1, logprobs.size(-1))[:, 0, :]
                    cand_scores = torch.gather(
                        logprobs_slice, dim=1, index=prefix_tokens[:, step].view(-1, 1)
                    ).expand(-1, cand_size)
                    cand_indices = (
                        prefix_tokens[:, step].view(-1, 1).expand(bsz, cand_size)
                    )
                    cand_beams.resize_as_(cand_indices).fill_(0)
                else:
                    # take the best 2 x beam_size predictions. We'll choose the first
                    # beam_size of these which don't predict eos to continue with.
                    torch.topk(
                        logprobs.view(bsz, -1),
                        k=min(
                            cand_size, logprobs.view(bsz, -1).size(1) - 1
                        ),  # -1 so we never select pad
                        out=(cand_scores, cand_indices),
                    )

                    possible_tokens_size = self.vocab_size
                    if possible_translation_tokens is not None:
                        possible_tokens_size = possible_translation_tokens.size(0)
                    # cand_indices has values in [0, vocab_size * beam_size]
                    # the following does euclidean division bu vocab_size
                    # to retrieve the beam and word id of each candidate
                    torch.div(cand_indices, possible_tokens_size, out=cand_beams)
                    cand_indices.fmod_(possible_tokens_size)
                    # Handle vocab reduction
                    if possible_translation_tokens is not None:
                        possible_translation_tokens = possible_translation_tokens.view(
                            1, possible_tokens_size
                        ).expand(cand_indices.size(0), possible_tokens_size)
                        cand_indices = torch.gather(
                            possible_translation_tokens,
                            dim=1,
                            index=cand_indices,
                            out=cand_indices,
                        )
            else:
                # finalize all active hypotheses once we hit maxlen
                # pick the hypothesis with the highest log prob of EOS right now
                torch.sort(
                    logprobs[:, self.eos],
                    descending=True,
                    out=(eos_scores, eos_bbsz_idx),
                )
                num_remaining_sent -= finalize_hypos(step, eos_bbsz_idx, eos_scores)
                assert num_remaining_sent == 0
                break

            # cand_bbsz_idx contains beam indices for the top candidate
            # hypotheses, with a range of values: [0, bsz*beam_size),
            # and dimensions: [bsz, cand_size]
            cand_bbsz_idx = cand_beams.add_(bbsz_offsets)

            # finalize hypotheses that end in eos
            eos_mask = cand_indices.eq(self.eos)
            if step >= self.minlen:
                # only consider eos when it's among the top beam_size indices
                torch.masked_select(
                    cand_bbsz_idx[:, :beam_size],
                    mask=eos_mask[:, :beam_size],
                    out=eos_bbsz_idx,
                )
                if eos_bbsz_idx.numel() > 0:
                    torch.masked_select(
                        cand_scores[:, :beam_size],
                        mask=eos_mask[:, :beam_size],
                        out=eos_scores,
                    )
                    num_remaining_sent -= finalize_hypos(
                        step, eos_bbsz_idx, eos_scores, cand_scores
                    )

            assert num_remaining_sent >= 0
            if num_remaining_sent == 0:
                break
            assert step < maxlen

            # set active_mask so that values > cand_size indicate eos hypos
            # and values < cand_size indicate candidate active hypos.
            # After, the min values per row are the top candidate active hypos
            active_mask = buffer("active_mask")
            torch.add(
                eos_mask.type_as(cand_offsets) * cand_size,
                cand_offsets[: eos_mask.size(1)],
                out=active_mask,
            )

            # get the top beam_size active hypotheses, which are just the hypos
            # with the smallest values in active_mask
            active_hypos, _ignore = buffer("active_hypos"), buffer("_ignore")
            torch.topk(
                active_mask,
                k=beam_size,
                dim=1,
                largest=False,
                out=(_ignore, active_hypos),
            )
            active_bbsz_idx = buffer("active_bbsz_idx")
            torch.gather(cand_bbsz_idx, dim=1, index=active_hypos, out=active_bbsz_idx)
            active_scores = torch.gather(
                cand_scores,
                dim=1,
                index=active_hypos,
                out=scores[:, step].view(bsz, beam_size),
            )
            active_bbsz_idx = active_bbsz_idx.view(-1)
            active_scores = active_scores.view(-1)

            # copy tokens and scores for active hypotheses
            torch.index_select(
                tokens[:, : step + 1],
                dim=0,
                index=active_bbsz_idx,
                out=tokens_buf[:, : step + 1],
            )
            torch.gather(
                cand_indices,
                dim=1,
                index=active_hypos,
                out=tokens_buf.view(bsz, beam_size, -1)[:, :, step + 1],
            )
            if step > 0:
                torch.index_select(
                    scores[:, :step],
                    dim=0,
                    index=active_bbsz_idx,
                    out=scores_buf[:, :step],
                )
            torch.gather(
                cand_scores,
                dim=1,
                index=active_hypos,
                out=scores_buf.view(bsz, beam_size, -1)[:, :, step],
            )

            # copy attention for active hypotheses
            torch.index_select(
                attn[:, :, : step + 2],
                dim=0,
                index=active_bbsz_idx,
                out=attn_buf[:, :, : step + 2],
            )

            # swap buffers
            tokens, tokens_buf = tokens_buf, tokens
            scores, scores_buf = scores_buf, scores
            attn, attn_buf = attn_buf, attn

            # reorder incremental state in decoder
            reorder_state = active_bbsz_idx

        # sort by score descending
        for sent in range(bsz):
            finalized[sent] = sorted(
                finalized[sent], key=lambda r: r["score"], reverse=True
            )

        return finalized
コード例 #55
0
ファイル: bregman_pytorch.py プロジェクト: syelman/DM-Count
def sinkhorn_knopp(a,
                   b,
                   C,
                   reg=1e-1,
                   maxIter=1000,
                   stopThr=1e-9,
                   verbose=False,
                   log=False,
                   warm_start=None,
                   eval_freq=10,
                   print_freq=200,
                   **kwargs):
    """
    Solve the entropic regularization optimal transport
    The input should be PyTorch tensors
    The function solves the following optimization problem:

    .. math::
        \gamma = arg\min_\gamma <\gamma,C>_F + reg\cdot\Omega(\gamma)
        s.t. \gamma 1 = a
             \gamma^T 1= b
             \gamma\geq 0
    where :
    - C is the (ns,nt) metric cost matrix
    - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
    - a and b are target and source measures (sum to 1)
    The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm as proposed in [1].

    Parameters
    ----------
    a : torch.tensor (na,)
        samples measure in the target domain
    b : torch.tensor (nb,)
        samples in the source domain
    C : torch.tensor (na,nb)
        loss matrix
    reg : float
        Regularization term > 0
    maxIter : int, optional
        Max number of iterations
    stopThr : float, optional
        Stop threshol on error ( > 0 )
    verbose : bool, optional
        Print information along iterations
    log : bool, optional
        record log if True

    Returns
    -------
    gamma : (na x nb) torch.tensor
        Optimal transportation matrix for the given parameters
    log : dict
        log dictionary return only if log==True in parameters

    References
    ----------
    [1] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013
    See Also
    --------

    """

    device = a.device
    na, nb = C.shape

    assert na >= 1 and nb >= 1, 'C needs to be 2d'
    assert na == a.shape[0] and nb == b.shape[
        0], "Shape of a or b does't match that of C"
    assert reg > 0, 'reg should be greater than 0'
    # assert a.min() >= 0. and b.min() >= 0., 'Elements in a or b less than 0'
    # unnecessary check for our special case
    if log:
        log = {'err': []}

    if warm_start is not None:
        u = warm_start['u']
        v = warm_start['v']
    else:
        u = torch.ones(na, dtype=a.dtype).to(device) / na
        v = torch.ones(nb, dtype=b.dtype).to(device) / nb

    K = torch.empty(C.shape, dtype=C.dtype).to(device)
    torch.div(C, -reg, out=K)
    torch.exp(K, out=K)

    b_hat = torch.empty(b.shape, dtype=C.dtype).to(device)

    it = 1
    err = 1

    # allocate memory beforehand
    KTu = torch.empty(v.shape, dtype=v.dtype).to(device)
    Kv = torch.empty(u.shape, dtype=u.dtype).to(device)

    while (err > stopThr and it <= maxIter):
        upre, vpre = u, v
        torch.matmul(u, K, out=KTu)
        v = torch.div(b, KTu + M_EPS)
        torch.matmul(K, v, out=Kv)
        u = torch.div(a, Kv + M_EPS)

        if torch.any(torch.isnan(u)) or torch.any(torch.isnan(v)) or \
                torch.any(torch.isinf(u)) or torch.any(torch.isinf(v)):
            print('Warning: numerical errors at iteration', it)
            u, v = upre, vpre
            break

        if log and it % eval_freq == 0:
            # we can speed up the process by checking for the error only all
            # the eval_freq iterations
            # below is equivalent to:
            # b_hat = torch.sum(u.reshape(-1, 1) * K * v.reshape(1, -1), 0)
            # but with more memory efficient
            b_hat = torch.matmul(u, K) * v
            err = (b - b_hat).pow(2).sum().item()
            # err = (b - b_hat).abs().sum().item()
            log['err'].append(err)

        if verbose and it % print_freq == 0:
            print('iteration {:5d}, constraint error {:5e}'.format(it, err))

        it += 1

    if log:
        log['u'] = u
        log['v'] = v
        log['alpha'] = reg * torch.log(u + M_EPS)
        log['beta'] = reg * torch.log(v + M_EPS)

    # transport plan
    P = u.reshape(-1, 1) * K * v.reshape(1, -1)
    if log:
        return P, log
    else:
        return P
コード例 #56
0
def main(exp_config):

    # =====================
    # Load network
    # =====================
    model = models.ResNet34(num_c=exp_config.num_classes)
    summary(model.cuda(),
            input_size=(3, 32, 32))  # display the layers of the network

    model.cuda()  # copy the model into gpu

    # =========================
    # Load source dataset and pre-trained model
    # =========================
    source_train_loader, source_test_loader, _ = load_datasets(
        exp_config.data_identifier_source, exp_config.batch_size)
    model.load_state_dict(torch.load(exp_config.pre_trained_net))
    model.eval()

    # =========================
    # KDE-based OOD detection
    # =========================

    # Open a .txt file to save the OOD detection results
    path_to_saved_results = 'results/' + exp_config.experiment_name + '/' + exp_config.method_name + '/results_' + str(
        exp_config.number_of_samples_for_KDE) + '.txt'
    f = open(path_to_saved_results, "w")

    # Compute number of layers in the network
    num_layers = KDE.compute_num_layers(exp_config, model)

    # Compute features for each channel for the test set of in-distribution dataset
    # get_features function returns MxN tensor where M is the number of samples
    # and N is the number of channels
    print('Calculating features for the test set of in-distribution dataset')
    feature_in_test = KDE.get_features(exp_config, model, num_layers,
                                       source_test_loader)

    # Compute features for each channel for the training set of in-distribution dataset
    print(
        'Calculating features for the training set of in-distribution dataset')
    feature_in_train = KDE.get_features(exp_config,
                                        model,
                                        num_layers,
                                        source_train_loader,
                                        is_train=True)

    # Compute features for each channel for the adversarially perturbed version of training set of in-distribution dataset
    print('Calculating features for the adversarial images')
    feature_in_train_perturbed = KDE.get_features(exp_config,
                                                  model,
                                                  num_layers,
                                                  source_train_loader,
                                                  perturb=True,
                                                  is_train=True)

    # Calculate features for each OOD dataset
    print('Calculating features for each OOD dataset')
    feature_ood = {}
    for target in exp_config.data_identifier_target:
        _, ood_loader, _ = load_datasets(target, exp_config.batch_size)
        feature_ood[target] = KDE.get_features(exp_config, model, num_layers,
                                               ood_loader)

    if exp_config.std_type == 'kNN':  # Load pre-computed sigma values for each channel using kNN as proposed in the paper - COMPUTATIONALLY INEFFICIENT
        std = torch.Tensor(
            np.load('results/std_%s.npy' %
                    (exp_config.data_identifier_source))).cuda()
    elif exp_config.std_type == 'interquartile':  # Compute signa values for each channel using interquartiles - COMPUTATIONALLY EFFICIENT AND LEADS TO SIMILAR RESULTS IN THE PAPER
        sorted_feature_in_train, _ = torch.sort(feature_in_train, axis=0)
        emp_std = torch.std(feature_in_test, axis=0)
        Q1, Q3 = torch.median(
            sorted_feature_in_train[0:sorted_feature_in_train.shape[0] // 2],
            axis=0).values, torch.median(
                sorted_feature_in_train[(sorted_feature_in_train.shape[0] //
                                         2):],
                axis=0).values
        IQR = Q3 - Q1
        std = 0.9 * torch.min(torch.cat(
            [torch.unsqueeze(emp_std, 0),
             torch.unsqueeze(IQR, 0) / 1.34],
            axis=0),
                              axis=0).values * (feature_in_train.shape[0]
                                                **(-1 / 5))

    # Calculate confidence scores using KDE for test set of the in-distribution dataset
    print(
        'Calculating confidence scores using KDE for the test set of the in-distribution dataset'
    )
    constant = 1 / (std * torch.sqrt(torch.Tensor([2 * math.pi]).cuda()))
    scores_in_test = 0
    for i in range(feature_in_train.shape[0]):
        zero_x = feature_in_test - feature_in_train[i]
        scores_in_test += constant * torch.exp(
            -0.5 * (torch.pow(torch.div(zero_x, std), 2)))
    scores_in_test /= feature_in_train.shape[0]
    scores_in_test = scores_in_test.detach().cpu().numpy()

    # Calculate confidence scores using KDE for training set of the in-distribution dataset
    print(
        'Calculating confidence scores using KDE for the training set of the in-distribution dataset'
    )
    scores_in_train = 0
    for i in range(feature_in_train.shape[0]):
        zero_x = feature_in_train - feature_in_train[i]
        scores_in_train += constant * torch.exp(
            -0.5 * (torch.pow(torch.div(zero_x, std), 2)))
    scores_in_train /= feature_in_train.shape[0]
    scores_in_train = scores_in_train.detach().cpu().numpy()

    # Calculate confidence scores using KDE for the adversarially perturbed version of training set of the in-distribution dataset
    print('Calculating confidence scores using KDE for the adversarial images')
    scores_in_train_perturbed = 0
    for i in range(feature_in_train.shape[0]):
        zero_x = feature_in_train_perturbed - feature_in_train[i]
        scores_in_train_perturbed += constant * torch.exp(
            -0.5 * (torch.pow(torch.div(zero_x, std), 2)))
    scores_in_train_perturbed /= feature_in_train.shape[0]
    scores_in_train_perturbed = scores_in_train_perturbed.detach().cpu().numpy(
    )

    # Calculate confidence scores using KDE for OOD datasets
    print('Calculating confidence scores using KDE for OOD datasets')
    scores_ood = {}
    for target in exp_config.data_identifier_target:
        scores_ood[target] = 0
        for i in range(feature_in_train.shape[0]):
            zero_x = feature_ood[target] - feature_in_train[i]
            scores_ood[target] += constant * torch.exp(
                -0.5 * (torch.pow(torch.div(zero_x, std), 2)))
        scores_ood[target] /= feature_in_train.shape[0]
        scores_ood[target] = scores_ood[target].detach().cpu().numpy()

    # Calculate OOD detection accuracy
    print('Calculating OOD detection accuracy')

    # Find channels that best distinguishes scores of in-distribution test set from the adversarial images
    y_pred = np.concatenate((scores_in_test, scores_in_train_perturbed),
                            axis=0)
    label = np.concatenate((np.ones(scores_in_test.shape[0]),
                            np.zeros(scores_in_train_perturbed.shape[0])),
                           axis=0)
    fpr_all = []
    for i in range(scores_in_test.shape[1]):
        fpr_at_95_tpr, detection_error, auroc, aupr_in = calculate_ood_detection_performance_metrics(
            label, y_pred[:, i], str(i), display=False)
        fpr_all.append(fpr_at_95_tpr)

    # Create training set to train logistic regression
    X_train = np.concatenate(
        (np.sort(scores_in_train[:, np.argsort(fpr_all)[:50]], axis=1),
         np.sort(scores_in_train_perturbed[:, np.argsort(fpr_all)[:50]],
                 axis=1)),
        axis=0)
    Y_train = np.concatenate((np.zeros(scores_in_train.shape[0]),
                              np.ones(scores_in_train_perturbed.shape[0])),
                             axis=0)

    # Train logistic regression
    lr = LogisticRegressionCV(n_jobs=-1).fit(X_train, Y_train)

    # Evaluate logistic regression on each OOD dataset and compute OOD detection accuracy
    f.write('Target \t\t FPRat95TPR \t DetErr \t AUROC \t\t AUPR_IN \n')
    print('Target \t\t TPRat95TPR \t DetErr \t AUROC \t\t AUPR_IN \n')
    for target in exp_config.data_identifier_target:
        X_test = np.concatenate(
            (np.sort(scores_in_test[:, np.argsort(fpr_all)[:50]], axis=1),
             np.sort(scores_ood[target][:, np.argsort(fpr_all)[:50]], axis=1)),
            axis=0)
        Y_test = np.concatenate((np.zeros(
            scores_in_test.shape[0]), np.ones(scores_ood[target].shape[0])),
                                axis=0)

        y_pred = lr.predict_proba(X_test)[:, 1]

        fpr_at_95_tpr, detection_error, auroc, aupr_in = calculate_ood_detection_performance_metrics(
            Y_test, y_pred, target, display=True)

        f.write(('%8s \t %.5f \t %.5f \t %.5f \t %.5f \n\n') %
                (target, fpr_at_95_tpr, detection_error, auroc, aupr_in))

    print('Results are saved to ' + path_to_saved_results)

    f.close()
コード例 #57
0
  def forward(self, q, q_len):
    embedded = self.embedding(q)
    q_len = Variable(torch.Tensor(q_len).view(-1, 1) + 1e-12, requires_grad=False).cuda()

    return torch.div( torch.sum(embedded, 1), q_len )
コード例 #58
0
    def attack_dataset(self, args, arch, result_dump_path):

        success = 0
        queries = []
        not_done = []
        correct_all = []
        total = 0
        for batch_idx, data_tuple in enumerate(self.data_loader):
            if args.dataset == "ImageNet":
                if self.model.input_size[-1] >= 299:
                    images, true_labels = data_tuple[1], data_tuple[2]
                else:
                    images, true_labels = data_tuple[0], data_tuple[2]
            else:
                images, true_labels = data_tuple[0], data_tuple[1]

            if images.size(-1) != self.model.input_size[-1]:
                images = F.interpolate(images,
                                       size=self.model.input_size[-1],
                                       mode='bilinear',
                                       align_corners=True)
            self.image_height = images.size(2)
            self.image_width = images.size(3)
            eps = args.epsilon
            if args.norm == 'l2':
                # epsilon = 1e-3
                # eps = np.sqrt(epsilon * model.input_size[-1] * model.input_size[-1] * self.in_channels)  # 1.752
                learning_rate = 2.0 / np.sqrt(
                    self.image_height * self.image_width * self.in_channels)
            else:
                learning_rate = 0.005

            images = images.cuda()
            true_labels = true_labels.cuda()

            with torch.no_grad():
                logits = self.model(images)
                pred = logits.argmax(dim=1)
                correct = pred.eq(true_labels).detach().cpu().numpy().astype(
                    np.int32)
                correct_all.append(correct)
                if correct[0].item() == 0:
                    queries.append(0)
                    not_done.append(0)  # 原本就分类错了,not_done = 0
                    log.info(
                        "The {}-th image is already classified incorrectly.")
                    continue

            if self.targeted:
                if self.target_type == 'random':
                    target_labels = torch.randint(
                        low=0,
                        high=CLASS_NUM[args.dataset],
                        size=true_labels.size()).long().cuda()
                    invalid_target_index = target_labels.eq(true_labels)
                    while invalid_target_index.sum().item() > 0:
                        target_labels[invalid_target_index] = torch.randint(
                            low=0,
                            high=logits.shape[1],
                            size=target_labels[invalid_target_index].shape
                        ).long().cuda()
                        invalid_target_index = target_labels.eq(true_labels)
                elif args.target_type == 'least_likely':
                    target_labels = logits.argmin(dim=1)
                elif args.target_type == "increment":
                    target_labels = torch.fmod(true_labels + 1,
                                               CLASS_NUM[args.dataset])
                else:
                    raise NotImplementedError('Unknown target_type: {}'.format(
                        args.target_type))
            else:
                target_labels = None

            total += images.size(0)
            sigma = args.sigma
            np.random.seed(0)
            torch.manual_seed(0)
            torch.cuda.manual_seed(0)
            adv_images = images.clone().cuda()
            assert images.size(0) == 1
            l = self.xent_loss(logits, true_labels,
                               target_labels)  # 按照元素论文来写的,好奇怪
            lr = float(learning_rate)
            total_q = 0
            ite = 0
            self.meta_model_for_q1.load_state_dict(
                self.pretrained_meta_weights)
            self.meta_model_for_q2.load_state_dict(
                self.pretrained_meta_weights)
            while total_q <= args.max_queries:
                total_q += 1
                # true = torch.squeeze(self.get_grad(self.model, adv_images, true_labels, target_labels))  # C,H,W, # 其实没啥用,只是为了看看估计的准不准
                # log.info("Grad norm : {:.3f}".format(torch.sqrt(torch.sum(true * true)).item()))

                if ite % 2 == 0 and sigma != args.sigma:
                    log.info("checking if sigma could be set to be 1e-4")
                    rand = torch.randn_like(adv_images)
                    rand = torch.div(
                        rand,
                        torch.clamp(torch.sqrt(
                            torch.mean(torch.mul(rand, rand))),
                                    min=1e-12))
                    logits_1 = self.model(adv_images + args.sigma * rand)
                    rand_loss = self.xent_loss(
                        logits_1, true_labels,
                        target_labels)  # shape = (batch_size,)
                    total_q += 1
                    rand = torch.randn_like(adv_images)
                    rand = torch.div(
                        rand,
                        torch.clamp(torch.sqrt(
                            torch.mean(torch.mul(rand, rand))),
                                    min=1e-12))
                    logits_2 = self.model(adv_images + args.sigma * rand)
                    rand_loss2 = self.xent_loss(
                        logits_2, true_labels,
                        target_labels)  # shape = (batch_size,)
                    total_q += 1
                    if (rand_loss - l)[0].item() != 0 and (rand_loss2 -
                                                           l)[0].item() != 0:
                        sigma = args.sigma
                        log.info("set sigma back to 1e-4, sigma={:.4f}".format(
                            sigma))

                if args.method != "uniform":
                    prior = torch.squeeze(
                        self.get_grad(self.surrogate_model, adv_images,
                                      true_labels, target_labels))  # C,H,W
                    # 下面求得余弦值
                    # alpha = torch.sum(true * prior) / torch.clamp(torch.sqrt(torch.sum(true * true) * torch.sum(prior * prior)), min=1e-12)  # 这个alpha仅仅用来看看梯度对不对,后续会更新
                    # log.info("alpha = {:.3}".format(alpha))
                    prior = prior / torch.clamp(torch.sqrt(
                        torch.mean(torch.mul(prior, prior))),
                                                min=1e-12)
                if args.method == "biased":
                    start_iter = 3  # 是只有start_iter=3的时候算一下gradient norm
                    if ite % 10 == 0 or ite == start_iter:
                        # Estimate norm of true gradient
                        s = 10
                        # pert shape = 10,C,H,W
                        pert = torch.randn(size=(s, adv_images.size(1),
                                                 adv_images.size(2),
                                                 adv_images.size(3)))
                        for i in range(s):
                            pert[i] = pert[i] / torch.clamp(torch.sqrt(
                                torch.mean(torch.mul(pert[i], pert[i]))),
                                                            min=1e-12)
                        pert = pert.cuda()
                        # pert = (10,C,H,W), adv_images = (1,C,H,W)
                        eval_points = adv_images + sigma * pert  # broadcast, because tensor shape doesn't match exactly
                        # eval_points shape = (10,C,H,W) reshape to (10*1, C, H, W)
                        eval_points = eval_points.view(-1, adv_images.size(1),
                                                       adv_images.size(2),
                                                       adv_images.size(3))
                        target_labels_s = None
                        if target_labels is not None:
                            target_labels_s = target_labels.repeat(s)
                        if ite % self.meta_predict_steps == 0:
                            logits_for_q1 = self.model(eval_points)
                            total_q += s
                            self.finetune_meta_model(self.meta_model_for_q1,
                                                     self.meta_optimizer_q1,
                                                     eval_points,
                                                     logits_for_q1)
                        else:
                            with torch.no_grad():
                                logits_for_q1 = self.meta_model_for_q1.forward(
                                    eval_points)

                        losses = self.xent_loss(
                            logits_for_q1, true_labels.repeat(s),
                            target_labels_s)  # shape = (10*B,)
                        norm_square = torch.mean(
                            ((losses - l) / sigma)**2)  # scalar
                    while True:
                        logits_for_prior_loss = self.model(
                            adv_images + sigma * prior)  # prior may be C,H,W
                        prior_loss = self.xent_loss(
                            logits_for_prior_loss, true_labels,
                            target_labels)  # shape = (batch_size,)
                        total_q += 1
                        diff_prior = (prior_loss - l)[0].item()
                        if diff_prior == 0:
                            sigma *= 2
                            log.info(
                                "sigma={:.4f}, multiply sigma by 2".format(
                                    sigma))
                        else:
                            break
                    est_alpha = diff_prior / sigma / torch.clamp(torch.sqrt(
                        torch.sum(torch.mul(prior, prior)) * norm_square),
                                                                 min=1e-12)
                    est_alpha = est_alpha.item()
                    log.info("Estimated alpha = {:.3f}".format(est_alpha))

                    if np.isnan(est_alpha):
                        # est_alpha = np.nan_to_num(est_alpha)
                        not_done.append(1)
                        queries.append(args.max_queries)
                        log.info("{}-th image failed because of nan".format(
                            batch_idx))
                        break

                    alpha = est_alpha  # alpha描述了替代模型的梯度是否有用,alpha越大λ也越大,λ=1表示相信这个prior
                    if alpha < 0:  #  夹角大于90度,cos变成负数
                        prior = -prior  # v = -v , negative the transfer gradient,
                        alpha = -alpha
                q = args.samples_per_draw
                n = self.image_height * self.image_width * self.in_channels
                d = 50 * 50 * self.in_channels
                gamma = 3.5
                A_square = d / n * gamma
                return_prior = False
                if args.method == 'biased':
                    if args.dataprior:
                        best_lambda = A_square * (
                            A_square - alpha**2 *
                            (d + 2 * q - 2)) / (A_square**2 + alpha**4 * d**2 -
                                                2 * A_square * alpha**2 *
                                                (q + d * q - 1))
                    else:
                        best_lambda = (1 - alpha**2) * (
                            1 - alpha**2 *
                            (n + 2 * q - 2)) / (alpha**4 * n *
                                                (n + 2 * q - 2) -
                                                2 * alpha**2 * n * q + 1)
                    log.info("best_lambda = {:.4f}".format(best_lambda))
                    if best_lambda < 1 and best_lambda > 0:
                        lmda = best_lambda
                    else:
                        if alpha**2 * (n + 2 * q - 2) < 1:
                            lmda = 0
                        else:
                            lmda = 1
                    if abs(alpha) >= 1:
                        lmda = 1
                    log.info("lambda = {:.3f}".format(lmda))
                    if lmda == 1:
                        return_prior = True  # lmda =1, we trust this prior as true gradient
                elif args.method == "fixed_biased":
                    lmda = 0.5
                if not return_prior:
                    if args.dataprior:
                        upsample = nn.UpsamplingNearest2d(
                            size=(
                                adv_images.size(-2),
                                adv_images.size(-1)))  # H, W of original image
                        pert = torch.randn(size=(q, self.in_channels, 50, 50))
                        pert = upsample(pert)
                    else:
                        pert = torch.randn(
                            size=(q, adv_images.size(-3), adv_images.size(-2),
                                  adv_images.size(-1)))  # q,C,H,W
                    pert = pert.cuda()
                    for i in range(q):
                        if args.method == 'biased' or args.method == 'fixed_biased':
                            angle_prior = torch.sum(pert[i] * prior) / \
                                          torch.clamp(torch.sqrt(torch.sum(pert[i] * pert[i]) * torch.sum(prior * prior)),min=1e-12)  # C,H,W x B,C,H,W
                            pert[i] = pert[
                                i] - angle_prior * prior  # prior = B,C,H,W so pert[i] = B,C,H,W  # FIXME 这里不支持batch模式
                            pert[i] = pert[i] / torch.clamp(torch.sqrt(
                                torch.mean(torch.mul(pert[i], pert[i]))),
                                                            min=1e-12)
                            # pert[i]就是论文算法1的第九行第二项的最右边的一串
                            pert[i] = np.sqrt(1 - lmda) * pert[i] + np.sqrt(
                                lmda) * prior  # paper's Algorithm 1: line 9
                        else:
                            pert[i] = pert[i] / torch.clamp(torch.sqrt(
                                torch.mean(torch.mul(pert[i], pert[i]))),
                                                            min=1e-12)
                    while True:
                        eval_points = adv_images + sigma * pert  # (1,C,H,W)  pert=(q,C,H,W)

                        if ite % self.meta_predict_steps == 0:
                            logits_for_q2 = self.model(eval_points)
                            total_q += q
                            self.finetune_meta_model(self.meta_model_for_q2,
                                                     self.meta_optimizer_q2,
                                                     eval_points,
                                                     logits_for_q2)
                        else:
                            with torch.no_grad():
                                logits_for_q2 = self.meta_model_for_q2.forward(
                                    eval_points)

                        target_labels_q = None
                        if target_labels is not None:
                            target_labels_q = target_labels.repeat(q)
                        losses = self.xent_loss(
                            logits_for_q2, true_labels.repeat(q),
                            target_labels_q)  # shape = (q,)

                        grad = (losses - l).view(
                            -1, 1, 1, 1) * pert  # (q,1,1,1) * (q,C,H,W)
                        grad = torch.mean(grad, dim=0, keepdim=True)  # 1,C,H,W
                        norm_grad = torch.sqrt(
                            torch.mean(torch.mul(grad, grad)))
                        if norm_grad.item() == 0:
                            sigma *= 5
                            log.info(
                                "estimated grad == 0, multiply sigma by 5. Now sigma={:.4f}"
                                .format(sigma))
                        else:
                            break
                    grad = grad / torch.clamp(torch.sqrt(
                        torch.mean(torch.mul(grad, grad))),
                                              min=1e-12)

                    def print_loss(model, direction):
                        length = [1e-4, 1e-3]
                        les = []
                        for ss in length:
                            logits_p = model(adv_images + ss * direction)
                            loss_p = self.xent_loss(logits_p, true_labels,
                                                    target_labels)
                            les.append((loss_p - l)[0].item())
                        log.info("losses: ".format(les))

                    if args.show_loss:
                        if args.method == 'biased' or args.method == 'fixed_biased':
                            show_input = adv_images + lr * prior
                            logits_show = self.model(show_input)
                            lprior = self.xent_loss(logits_show, true_labels,
                                                    target_labels) - l
                            print_loss(self.model, prior)
                            show_input_2 = adv_images + lr * grad
                            logits_show2 = self.model(show_input_2)
                            lgrad = self.xent_loss(logits_show2, true_labels,
                                                   target_labels) - l
                            print_loss(self.model, grad)
                            log.info(lprior, lgrad)
                else:
                    grad = prior
                # log.info("angle = {:.4f}".format(torch.sum(true*grad) /
                #                                  torch.clamp(torch.sqrt(torch.sum(true*true) * torch.sum(grad*grad)),min=1e-12)))
                if args.norm == "l2":
                    adv_images = adv_images + lr * grad / torch.clamp(
                        torch.sqrt(torch.mean(torch.mul(grad, grad))),
                        min=1e-12)
                    adv_images = self.l2_proj_step(images, eps, adv_images)
                else:
                    if grad.dim() == 3:
                        grad = grad.unsqueeze(0)
                    adv_images = adv_images + lr * torch.sign(grad)
                    adv_images = torch.min(torch.max(adv_images, images - eps),
                                           images + eps)
                adv_images = torch.clamp(adv_images, self.clip_min,
                                         self.clip_max)
                adv_labels = self.get_pred(self.model, adv_images)
                logits_ = self.model(adv_images)
                l = self.xent_loss(logits_, true_labels, target_labels)
                # log.info('queries:', total_q, 'loss:', l, 'learning rate:', lr, 'sigma:', sigma, 'prediction:', adv_labels,
                #       'distortion:', torch.max(torch.abs(adv_images - images)).item(), torch.norm((adv_images - images).view(images.size(0),-1)).item())
                ite += 1

                if (self.targeted and adv_labels[0].item() == target_labels[0].item()) \
                        or (not self.targeted and adv_labels[0].item() != true_labels[0].item()):
                    log.info(
                        "Success in {}-th image, Stop at queries : {}".format(
                            batch_idx, total_q))
                    success += 1
                    not_done.append(0)
                    queries.append(total_q)
                    break
            else:
                not_done.append(1)
                queries.append(
                    args.max_queries)  # 因此不能用np.mean(queries)来计算,平均query次数

        log.info(
            'Attack {} success rate: {:.3f} Queries_mean: {:.3f} Queries_median: {:.3f}'
            .format(arch, success / total, np.mean(queries),
                    np.median(queries)))
        correct_all = np.concatenate(correct_all, axis=0).astype(np.int32)
        query_all = np.array(queries).astype(np.int32)
        not_done_all = np.array(not_done).astype(np.int32)
        success = (1 - not_done_all) * correct_all
        success_query = success * query_all
        meta_info_dict = {
            "query_all": query_all.tolist(),
            "not_done_all": not_done_all.tolist(),
            "correct_all": correct_all.tolist(),
            "mean_query":
            np.mean(success_query[np.nonzero(success)[0]]).item(),
            "max_query": np.max(success_query[np.nonzero(success)[0]]).item(),
            "median_query":
            np.median(success_query[np.nonzero(success)[0]]).item(),
            "avg_not_done": np.mean(not_done_all.astype(np.float32)).item(),
            "args": vars(args)
        }
        with open(result_dump_path, "w") as result_file_obj:
            json.dump(meta_info_dict, result_file_obj, sort_keys=True)
        log.info("done, write stats info to {}".format(result_dump_path))
コード例 #59
0
def l1norm(X, dim, eps=1e-8):
    """L1-normalize columns of X
    """
    norm = torch.abs(X).sum(dim=dim, keepdim=True) + eps
    X = torch.div(X, norm)
    return X
コード例 #60
0
def conditional_lognormal_loss(model,
                               x,
                               t,
                               e,
                               pdf_u,
                               pdf_c,
                               hr_loss=False,
                               imbalance_loss=False,
                               elbo=True,
                               risk=1):

    shape, scale, logits = model.forward(x)

    lossf = []
    losss = []

    k_ = shape
    b_ = scale
    loss_neg = 0
    for g in range(model.k):

        mu = k_[:, g]
        sigma = b_[:, g]

        f = -sigma - 0.5 * np.log(2 * np.pi)
        f = f - torch.div((torch.log(t) - mu)**2, 2. * torch.exp(2 * sigma))
        s = torch.div(torch.log(t) - mu, torch.exp(sigma) * np.sqrt(2))
        s = 0.5 - 0.5 * torch.erf(s)
        s = torch.log(s)

        lossf.append(f)
        losss.append(s)

        # negative partial log likelihood
        hr = f - s
        loss_neg += PartialLogLikelihood()(hr, e)

    losss = torch.stack(losss, dim=1)
    lossf = torch.stack(lossf, dim=1)

    if elbo:
        lossg = nn.Softmax(dim=1)(logits)
        losss = lossg * losss
        lossf = lossg * lossf

        losss = losss.sum(dim=1)
        lossf = lossf.sum(dim=1)
    else:
        lossg = nn.LogSoftmax(dim=1)(logits)
        losss = lossg + losss
        lossf = lossg + lossf

        losss = torch.logsumexp(losss, dim=1)
        lossf = torch.logsumexp(lossf, dim=1)

    if imbalance_loss:
        try:
            idx_time = t.int().cpu().detach().numpy()
            idx_time[idx_time >= 10] = 9
            pdf_u_ = torch.tensor(pdf_u).cuda()
            pdf_c_ = torch.tensor(pdf_c).cuda()
            lossf = lossf * ((1 - pdf_u_[idx_time]).exp())
            losss = losss * ((1 - pdf_c_[idx_time]).exp())
        except:
            pass

    uncens = np.where(e.cpu().data.numpy() == int(risk))[0]
    cens = np.where(e.cpu().data.numpy() != int(risk))[0]
    ll = lossf[uncens].sum() + model.discount * losss[cens].sum()

    if hr_loss and e.sum() > 0:
        return -ll / float(len(uncens) + len(cens)) + loss_neg * model.gamma
    else:
        return -ll / float(len(uncens) + len(cens))