def forward(self, input, label):
        # --------------------------- cos(theta) & phi(theta) ---------------------------
        if self.device_id == None:
            cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        else:
            x = input
            sub_weights = torch.chunk(self.weight, len(self.device_id), dim=0)
            temp_x = x.cuda(self.device_id[0])
            weight = sub_weights[0].cuda(self.device_id[0])
            cosine = F.linear(F.normalize(temp_x), F.normalize(weight))
            for i in range(1, len(self.device_id)):
                temp_x = x.cuda(self.device_id[i])
                weight = sub_weights[i].cuda(self.device_id[i])
                cosine = torch.cat((cosine, F.linear(F.normalize(temp_x), F.normalize(weight)).cuda(self.device_id[0])), dim=1) 
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)
        # --------------------------- convert label to one-hot ---------------------------
        one_hot = torch.zeros(cosine.size())
        if self.device_id != None:
            one_hot = one_hot.cuda(self.device_id[0])
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)  # you can use torch.where if your torch.__version__ is 0.4
        output *= self.s

        return output
Example #2
0
def bisect_demo():
    """ Bisect the LB/UB on specified columns.
        The key is to use scatter_() to convert indices into one-hot encodings.
    """
    t1t2 = torch.stack((torch.randn(5, 4), torch.randn(5, 4)), dim=-1)
    lb, _ = torch.min(t1t2, dim=-1)
    ub, _ = torch.max(t1t2, dim=-1)
    print('LB:', lb)
    print('UB:', ub)

    # random idxs for testing
    idxs = torch.randn_like(lb)
    _, idxs = idxs.max(dim=-1)  # <Batch>
    print('Split idxs:', idxs)

    idxs = idxs.unsqueeze(dim=-1)  # Batch x 1
    idxs = torch.zeros_like(lb).byte().scatter_(-1, idxs, 1)  # convert into one-hot encoding
    print('Reorg idxs:', idxs)

    mid = (lb + ub) / 2.0
    lefts_lb = lb
    lefts_ub = torch.where(idxs, mid, ub)  # use the one-hot encoding to call torch.where()
    rights_lb = torch.where(idxs, mid, lb)  # definitely faster than element-wise reassignment
    rights_ub = ub

    print('LEFT LB:', lefts_lb)
    print('LEFT UB:', lefts_ub)
    print('RIGHT LB:', rights_lb)
    print('RIGHT UB:', rights_ub)

    newlb = torch.cat((lefts_lb, rights_lb), dim=0)
    newub = torch.cat((lefts_ub, rights_ub), dim=0)
    return newlb, newub
    def forward(self, x, label):
        cosine = F.linear(F.normalize(x), F.normalize(self.weight))
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where((cosine - self.th) > 0, phi, cosine - self.mm)

        one_hot = torch.zeros(cosine.size(), device='cuda')
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.s
        return output
Example #4
0
    def forward(self, scores, align, target):
        """
        Args:
            scores (FloatTensor): ``(batch_size*tgt_len)`` x dynamic vocab size
                whose sum along dim 1 is less than or equal to 1, i.e. cols
                softmaxed.
            align (LongTensor): ``(batch_size x tgt_len)``
            target (LongTensor): ``(batch_size x tgt_len)``
        """
        # probabilities assigned by the model to the gold targets
        vocab_probs = scores.gather(1, target.unsqueeze(1)).squeeze(1)

        # probability of tokens copied from source
        copy_ix = align.unsqueeze(1) + self.vocab_size
        copy_tok_probs = scores.gather(1, copy_ix).squeeze(1)
        # Set scores for unk to 0 and add eps
        copy_tok_probs[align == self.unk_index] = 0
        copy_tok_probs += self.eps  # to avoid -inf logs

        # find the indices in which you do not use the copy mechanism
        non_copy = align == self.unk_index
        if not self.force_copy:
            non_copy = non_copy | (target != self.unk_index)

        probs = torch.where(
            non_copy, copy_tok_probs + vocab_probs, copy_tok_probs
        )

        loss = -probs.log()  # just NLLLoss; can the module be incorporated?
        # Drop padding.
        loss[target == self.ignore_index] = 0
        return loss
Example #5
0
def hingeembeddingloss_reference(input, target, margin=1.0, size_average=True, reduce=True):
    margin_clamp = (margin - input).clamp(min=0).type_as(input)
    output = torch.where(target == 1, input, margin_clamp)

    if reduce and size_average:
        return output.mean()
    elif reduce:
        return output.sum()
    return output
    def forward(self, input, label):
        # --------------------------- cos(theta) & phi(theta) ---------------------------
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)
        # --------------------------- convert label to one-hot ---------------------------
        # one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda')
        one_hot = torch.zeros(cosine.size(), device = 'cuda')
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)  # you can use torch.where if your torch.__version__ is 0.4
        output *= self.s

        return output
Example #7
0
    def backward(ctx, grad_output):
        supp_size, output = ctx.saved_tensors
        dim = ctx.dim
        grad_input = grad_output.clone()
        grad_input[output == 0] = 0

        v_hat = grad_input.sum(dim=dim) / supp_size.to(output.dtype).squeeze()
        v_hat = v_hat.unsqueeze(dim)
        grad_input = torch.where(output != 0, grad_input - v_hat, grad_input)
        return grad_input, None
def smooth_l1_loss(input, target, beta=1. / 9, size_average=True):
    """
    very similar to the smooth_l1_loss from pytorch, but with
    the extra beta parameter
    """
    n = torch.abs(input - target)
    cond = n < beta
    loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)
    if size_average:
        return loss.mean()
    return loss.sum()
Example #9
0
File: LID.py Project: naiqili/CL
def lid(Mxy, k):
    eps_mat = torch.where(Mxy > 1e-20, torch.zeros((1, 1)), torch.ones((1, 1)) * 1e-20).detach()
    Mxy = Mxy + eps_mat
    value, idx = Mxy.topk(k=k, largest=False)
    mask = torch.zeros(Mxy.size()).type(Mxy.type())
    mask.scatter_(1, idx, 1.0)
    r_max = value[:, -1].detach()

    # est = -1 / (1. / k * torch.sum(torch.log(Mxy + eps_mat) * mask, dim=-1) - torch.log(r_max))
    est = -1 / (torch.mean(torch.log(value), dim=-1) - torch.log(r_max))
    return est
Example #10
0
def cosineembeddingloss_reference(input1, input2, target, margin=0, size_average=True, reduce=True):
    def _cos(a, b):
        cos = a.new(a.size(0))
        for i in range(0, a.size(0)):
            cos[i] = (a[i] * b[i]).sum() / ((((a[i] * a[i]).sum() + 1e-12) * ((b[i] * b[i]).sum() + 1e-12)) ** 0.5)
        return cos

    output = torch.where(target == 1, 1 - _cos(input1, input2), (_cos(input1, input2) - margin).clamp(min=0))

    if reduce and size_average:
        return output.mean()
    elif reduce:
        return output.sum()
    return output
Example #11
0
def hingeembeddingloss_reference(input, target, margin=1.0, size_average=True, reduce=True):
    # needed for legacy tests
    if not isinstance(input, Variable):
        input = Variable(input)
        target = Variable(target)

    margin_clamp = (margin - input).clamp(min=0).type_as(input)
    output = torch.where(target == 1, input, margin_clamp)

    if reduce and size_average:
        return output.mean()
    elif reduce:
        return output.sum()
    return output
Example #12
0
 def forward(self, cosine, target):
     
     sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
     phi = cosine * self.cos_m - sine * self.sin_m
     phi = torch.where(cosine > self.th, phi, cosine - self.mm)
     # --------------------------- convert label to one-hot ---------------------------
     # one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda')
     one_hot = torch.zeros(cosine.size(), device='cuda')
     one_hot.scatter_(1, target.view(-1, 1).long(), 1)
     # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
     output = (one_hot * phi) + ((1.0 - one_hot) * cosine)  # you can use torch.where if your torch.__version__ is 0.4
     #output = output - one_hot * self.m_cos # cosine-margin
     output *= self.s
     
     loss = self.ce(output, target)
     return loss     
Example #13
0
    def forward(self, input, adj):
        h = torch.mm(input, self.W)
        N = h.size()[0]

        a_input = torch.cat([h.repeat(1, N).view(N * N, -1), h.repeat(N, 1)], dim=1).view(N, -1, 2 * self.out_features)
        e = self.leakyrelu(torch.matmul(a_input, self.a).squeeze(2))

        zero_vec = -9e15*torch.ones_like(e)
        attention = torch.where(adj > 0, e, zero_vec)
        attention = F.softmax(attention, dim=1)
        attention = F.dropout(attention, self.dropout, training=self.training)
        h_prime = torch.matmul(attention, h)

        if self.concat:
            return F.elu(h_prime)
        else:
            return h_prime
Example #14
0
    def forward(self, X, Z=None, diag=False):
        variance = self.get_param("variance")

        if Z is None:
            Z = X
        X = self._slice_input(X)
        if diag:
            return variance * X.abs().squeeze(1)

        Z = self._slice_input(Z)
        if X.shape[1] != Z.shape[1]:
            raise ValueError("Inputs must have the same number of features.")

        Zt = Z.t()
        return torch.where(X.sign() == Zt.sign(),
                           variance * torch.min(X.abs(), Zt.abs()),
                           X.data.new_zeros(X.shape[0], Z.shape[0]))
Example #15
0
    def forward(ctx, input, target):
        """
        input (FloatTensor): ``(n, num_classes)``.
        target (LongTensor): ``(n,)``, the indices of the target classes
        """
        input_batch, classes = input.size()
        target_batch = target.size(0)
        aeq(input_batch, target_batch)

        z_k = input.gather(1, target.unsqueeze(1)).squeeze()
        tau_z, support_size = _threshold_and_support(input, dim=1)
        support = input > tau_z
        x = torch.where(
            support, input**2 - tau_z**2,
            torch.tensor(0.0, device=input.device)
        ).sum(dim=1)
        ctx.save_for_backward(input, target, tau_z)
        # clamping necessary because of numerical errors: loss should be lower
        # bounded by zero, but negative values near zero are possible without
        # the clamp
        return torch.clamp(x / 2 - z_k + 0.5, min=0.0)
def shapeOptimizer(input_antenna, numsteps=10000, load=False, model=None):
    target = torch.Tensor([60,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01]).to(device)
    target = target.expand(input_antenna.size(0), 153)
    # input_antenna = random
    test.draw_antenna(input_antenna[0].numpy(), 'input_antenna1.jpg')
    input_antenna = input_antenna.float().to(device)

    if model is None:
        model = DeepResField(Block, [2, 3, 1]).to(device)
        if load:
            checkpoint = torch.load('checkpoint.pt.tar')
            model.load_state_dict(checkpoint['state_dict'])
    model.eval()
    for param in model.parameters():
        param.requires_grad = False

    criterion = nn.MSELoss()
    optimizer = optim.Adam([input_antenna.requires_grad_()], lr=0.0001, weight_decay=0.001)

    for step in range(numsteps):

        output = model(input_antenna)
        loss = criterion(output, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        input_antenna.data.clamp_(0., 1.)  # input_antenna = torch.clamp(input_antenna, min=0., max=1.)

        if step % 500 == 0:
            print("Step: {}, loss: {}, input: {}".format(step, loss.item(), input_antenna))

    final_antenna = torch.where(input_antenna >= torch.Tensor([0.5]).to(device), torch.Tensor([1]).to(device),
                                torch.Tensor([0]).to(device))
    print(final_antenna)
    test.draw_antenna(final_antenna[0].numpy(), 'final_antenna1.jpg')
    torch.save(final_antenna, 'final_antenna.pt')
Example #17
0
    def update(self, max_iter, iteration):
        # Velocities update
        #self.chi=(self.chi_max-self.chi_min)*((max_iter-itera)/max_iter)
        R_p = torch.rand(size=(self.size, self.dim)).double().cuda()
        R_g = torch.rand(size=(self.size, self.dim)).double().cuda()
        lr1, lr2 = self.layer_dim[0], self.layer_dim[1]
        self.V = torch.where(self.V > 0.5, (-self.V_max) / 5, self.V)
        self.V = torch.where(self.V < -0.5, (-self.V_min) / 5, self.V)
        self.X = torch.where(self.X > 40.0, self.X_max, self.X)
        self.X = torch.where(self.X < -4.0, self.X_min, self.X)
        #self.layer_select()

        self.V[:,lr1:lr2] = self.chi * (self.V[:,lr1:lr2]) \
                + self.phi_p * R_p[:,lr1:lr2] * (self.P[:,lr1:lr2] - self.X[:,lr1:lr2]) \
                + self.phi_g * R_g[:,lr1:lr2] * (self.G[:,lr1:lr2] - self.X[:,lr1:lr2])

        # Positions update
        self.X[:, lr1:lr2] = self.X[:, lr1:lr2] + self.V[:, lr1:lr2]
        print(self.X.max().cpu().numpy(), self.V.max().cpu().numpy())
        # Best scores
        #print(self.X.size(),"X")

        scores = self.cost_func(self.X)
        better_scores_idx = scores < self.S
        #print(better_scores_idx)

        self.P = torch.where(better_scores_idx, self.X, self.P)
        #self.P[better_scores_idx] = self.X[better_scores_idx,:]
        self.S[better_scores_idx] = scores[better_scores_idx]

        self.g = self.P[self.S.argmin()]
        self.best_score = self.S.min()
        #print(self.X.dtype,self.P.dtype,self.V.dtype,self.G.dtype,better_scores_idx.size(),scores.size(),scores,better_scores_idx)
        idx = 0
        for i in range(self.size // self.neghburs):
            if (idx == 0 and idx < self.size):
                max_matrix = self.S[self.size -
                                    1]  #Pick last particle to complete ring
                #print(idx,idx+self.neghburs,i)
                max_matrix = np.append(
                    max_matrix, self.S[0:(self.neghburs +
                                          1)])  #Append the next 5 neighbours
                #print(max_matrix.shape)
                assert (max_matrix.shape, (self.neghburs + 2))
                #Add support t add into G local best if is at zero
                if (max_matrix.argmin() == 0):
                    #If gbest is last element
                    self.G[0:(idx + self.neghburs), :] = self.P[self.size - 1]
                else:
                    #else
                    self.G[0:(idx +
                              self.neghburs), :] = self.P[max_matrix.argmin() -
                                                          1]
                #print(self.P[max_matrix.argmin()].shape,self.G[0:(idx+self.neghburs),:].shape,"Here1")
                idx = idx + self.neghburs
            elif ((self.size - idx) <= self.neghburs and idx < self.size):
                max_matrix = self.S[idx - 1:]

                max_matrix = np.append(max_matrix, self.S[0])
                #print(idx+(max_matrix.argmin()-1))
                #print(max_matrix.shape,"Now")
                if (max_matrix.argmin() == self.neghburs + 1):
                    #First element is gbest
                    self.G[idx:, :] = self.P[0]
                    #print("here \too")
                    #print(self.P[0].shape,self.G[idx:,:].shape,"here2")
                else:
                    self.G[idx:, :] = self.P[idx + (max_matrix.argmin() - 1)]
                    #print(self.P[idx+(max_matrix.argmin()-1)].shape,self.G[idx:,:].shape,"here2")

                #print(idx,i)
                idx = idx + self.neghburs
            elif (idx < self.size and (self.size - idx) > self.neghburs):
                max_matrix = scores[idx - 1:idx + (self.neghburs + 1)]
                #print(max_matrix.shape,"here")
                assert (max_matrix.shape, self.neghburs + 2)
                #add suppot to add to G if the global is from last position
                self.G[idx:idx +
                       self.neghburs, :] = self.P[idx +
                                                  (max_matrix.argmin() - 1)]
                #print(idx,i)
                #print(self.P[idx+(max_matrix.argmin()-1)].shape,self.G[idx:idx+self.neghburs,:].shape,"here3")
                idx = idx + self.neghburs

        assert (self.X.shape, self.G.shape)
Example #18
0
    def search(self,
               start_predictions: torch.Tensor,
               start_state: StateType,
               step: StepFunctionType) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Given a starting state and a step function, apply beam search to find the
        most likely target sequences.

        Parameters
        ----------
        start_predictions : ``torch.Tensor``
            A tensor containing the initial predictions with shape ``(batch_size,)``.
            Usually the initial predictions are just the index of the "start" token
            in the target vocabulary.
        start_state : ``StateType``
            The initial state passed to the ``step`` function. Each value of the state dict
            should be a tensor of shape ``(batch_size, *)``, where ``*`` means any other
            number of dimensions.
        step : ``StepFunctionType``
            A function that is responsible for computing the next most likely tokens,
            given the current state and the predictions from the last time step.
            The function should accept two arguments. The first being a tensor
            of shape ``(group_size,)``, representing the index of the predicted
            tokens from the last time step, and the second being the current state.
            The ``group_size`` will be ``batch_size * beam_size``, except in the initial
            step, for which it will just be ``batch_size``.
            The function is expected to return a tuple, where the first element
            is a tensor of shape ``(group_size, target_vocab_size)`` containing
            the log probabilities of the tokens for the next step, and the second
            element is the updated state. The tensor in the state should have shape
            ``(group_size, *)``, where ``*`` means any other number of dimensions.

        Returns
        -------
        Tuple[torch.Tensor, torch.Tensor]
            Tuple of ``(predictions, log_probabilities)``, where ``predictions``
            has shape ``(batch_size, beam_size, max_steps)`` and ``log_probabilities``
            has shape ``(batch_size, beam_size)``.
        """
        batch_size = start_predictions.size()[0]

        # List of (batch_size, beam_size) tensors. One for each time step. Does not
        # include the start symbols, which are implicit.
        predictions: List[torch.Tensor] = []

        # List of (batch_size, beam_size) tensors. One for each time step. None for
        # the first.  Stores the index n for the parent prediction, i.e.
        # predictions[t-1][i][n], that it came from.
        backpointers: List[torch.Tensor] = []

        # Calculate the first timestep. This is done outside the main loop
        # because we are going from a single decoder input (the output from the
        # encoder) to the top `beam_size` decoder outputs. On the other hand,
        # within the main loop we are going from the `beam_size` elements of the
        # beam to `beam_size`^2 candidates from which we will select the top
        # `beam_size` elements for the next iteration.
        # shape: (batch_size, num_classes)
        start_class_log_probabilities, state = step(start_predictions, start_state)

        num_classes = start_class_log_probabilities.size()[1]

        # Make sure `per_node_beam_size` is not larger than `num_classes`.
        if self.per_node_beam_size > num_classes:
            raise ConfigurationError(f"Target vocab size ({num_classes:d}) too small "
                                     f"relative to per_node_beam_size ({self.per_node_beam_size:d}).\n"
                                     f"Please decrease beam_size or per_node_beam_size.")

        # shape: (batch_size, beam_size), (batch_size, beam_size)
        start_top_log_probabilities, start_predicted_classes = \
                start_class_log_probabilities.topk(self.beam_size)

        # The log probabilities for the last time step.
        # shape: (batch_size, beam_size)
        last_log_probabilities = start_top_log_probabilities

        # shape: [(batch_size, beam_size)]
        predictions.append(start_predicted_classes)

        # Log probability tensor that mandates that the end token is selected.
        # shape: (batch_size * beam_size, num_classes)
        log_probs_after_end = start_class_log_probabilities.new_full(
                (batch_size * self.beam_size, num_classes),
                float("-inf")
        )
        log_probs_after_end[:, self._end_index] = 0.

        # Set the same state for each element in the beam.
        for key, state_tensor in state.items():
            _, *last_dims = state_tensor.size()
            # shape: (batch_size * beam_size, *)
            state[key] = state_tensor.\
                    unsqueeze(1).\
                    expand(batch_size, self.beam_size, *last_dims).\
                    reshape(batch_size * self.beam_size, *last_dims)

        for timestep in range(self.max_steps - 1):
            # shape: (batch_size * beam_size,)
            last_predictions = predictions[-1].reshape(batch_size * self.beam_size)

            # If every predicted token from the last step is `self._end_index`,
            # then we can stop early.
            if (last_predictions == self._end_index).all():
                break

            # Take a step. This get the predicted log probs of the next classes
            # and updates the state.
            # shape: (batch_size * beam_size, num_classes)
            class_log_probabilities, state = step(last_predictions, state)

            # shape: (batch_size * beam_size, num_classes)
            last_predictions_expanded = last_predictions.unsqueeze(-1).expand(
                    batch_size * self.beam_size,
                    num_classes
            )

            # Here we are finding any beams where we predicted the end token in
            # the previous timestep and replacing the distribution with a
            # one-hot distribution, forcing the beam to predict the end token
            # this timestep as well.
            # shape: (batch_size * beam_size, num_classes)
            cleaned_log_probabilities = torch.where(
                    last_predictions_expanded == self._end_index,
                    log_probs_after_end,
                    class_log_probabilities
            )

            top_log_probabilities, predicted_classes = \
                cleaned_log_probabilities.topk(self.per_node_beam_size)
            # shape (both): (batch_size * beam_size, per_node_beam_size)

            # Here we expand the last log probabilities to (batch_size * beam_size, per_node_beam_size)
            # so that we can add them to the current log probs for this timestep.
            # This lets us maintain the log probability of each element on the beam.
            # shape: (batch_size * beam_size, per_node_beam_size)
            expanded_last_log_probabilities = last_log_probabilities.\
                    unsqueeze(2).\
                    expand(batch_size, self.beam_size, self.per_node_beam_size).\
                    reshape(batch_size * self.beam_size, self.per_node_beam_size)

            # shape: (batch_size * beam_size, per_node_beam_size)
            summed_top_log_probabilities = top_log_probabilities + expanded_last_log_probabilities

            # shape: (batch_size, beam_size * per_node_beam_size)
            reshaped_summed = summed_top_log_probabilities.\
                    reshape(batch_size, self.beam_size * self.per_node_beam_size)

            # shape: (batch_size, beam_size * per_node_beam_size)
            reshaped_predicted_classes = predicted_classes.\
                    reshape(batch_size, self.beam_size * self.per_node_beam_size)

            # Keep only the top `beam_size` beam indices.
            # shape: (batch_size, beam_size), (batch_size, beam_size)
            restricted_beam_log_probs, restricted_beam_indices = reshaped_summed.topk(self.beam_size)

            # Use the beam indices to extract the corresponding classes.
            # shape: (batch_size, beam_size)
            restricted_predicted_classes = reshaped_predicted_classes.gather(1, restricted_beam_indices)

            predictions.append(restricted_predicted_classes)

            # shape: (batch_size, beam_size)
            last_log_probabilities = restricted_beam_log_probs

            # The beam indices come from a `beam_size * per_node_beam_size` dimension where the
            # indices with a common ancestor are grouped together. Hence
            # dividing by per_node_beam_size gives the ancestor. (Note that this is integer
            # division as the tensor is a LongTensor.)
            # shape: (batch_size, beam_size)
            backpointer = restricted_beam_indices / self.per_node_beam_size

            backpointers.append(backpointer)

            # Keep only the pieces of the state tensors corresponding to the
            # ancestors created this iteration.
            for key, state_tensor in state.items():
                _, *last_dims = state_tensor.size()
                # shape: (batch_size, beam_size, *)
                expanded_backpointer = backpointer.\
                        view(batch_size, self.beam_size, *([1] * len(last_dims))).\
                        expand(batch_size, self.beam_size, *last_dims)

                # shape: (batch_size * beam_size, *)
                state[key] = state_tensor.\
                        reshape(batch_size, self.beam_size, *last_dims).\
                        gather(1, expanded_backpointer).\
                        reshape(batch_size * self.beam_size, *last_dims)

        # Reconstruct the sequences.
        # shape: [(batch_size, beam_size, 1)]
        reconstructed_predictions = [predictions[-1].unsqueeze(2)]

        # shape: (batch_size, beam_size)
        cur_backpointers = backpointers[-1]

        for timestep in range(len(predictions) - 2, 0, -1):
            # shape: (batch_size, beam_size, 1)
            cur_preds = predictions[timestep].gather(1, cur_backpointers).unsqueeze(2)

            reconstructed_predictions.append(cur_preds)

            # shape: (batch_size, beam_size)
            cur_backpointers = backpointers[timestep - 1].gather(1, cur_backpointers)

        # shape: (batch_size, beam_size, 1)
        final_preds = predictions[0].gather(1, cur_backpointers).unsqueeze(2)

        reconstructed_predictions.append(final_preds)

        # shape: (batch_size, beam_size, max_steps)
        all_predictions = torch.cat(list(reversed(reconstructed_predictions)), 2)

        return all_predictions, last_log_probabilities
Example #19
0
def bs_test(model_name='bert-large-uncased-whole-word-masking'
            ):  # distilbert-base-uncased
    sys.path.append('/Users/i350230/GITHUB/CTCDecoder/src/')
    import editdistance as ed
    from BKTree import BKTree
    from collections import defaultdict
    from transformers import AutoModelWithLMHead, AutoTokenizer

    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelWithLMHead.from_pretrained(model_name)

    with open('_Data_metadata_letters_wav2vec.pk', 'rb') as f:
        data = pk.load(f)

    with open('_ctc_exp3_predictions.pk', 'rb') as f:
        res = pk.load(f)

    idx_to_tokens = ['<blank>'] + data['idx_to_tokens'][3:]
    tokens_to_idx = {t: i for i, t in enumerate(idx_to_tokens)}

    greedy_preds = [
        np.array(p).argmax(-1).tolist() for p in res['predictions']
    ]
    target_sentences = [
        ''.join([idx_to_tokens[i] for i in t[:t.index(0) if 0 in t else None]])
        for t in res['targets']
    ]
    greedy_preds_sentences = [[i for i, _ in groupby(p)] for p in greedy_preds]
    greedy_preds_sentences = [
        ''.join([idx_to_tokens[i] for i in p if i != 0])
        for p in greedy_preds_sentences
    ]
    print(
        Data.compute_scores(targets=target_sentences,
                            predictions=greedy_preds_sentences,
                            rec=False))

    vocabs = list(
        set([
            w for s in data['ids_to_transcript_train'].values()
            for w in s.lower().split(' ')
        ]))
    vocabs += list(
        set([
            w for s in data['ids_to_transcript_test'].values()
            for w in s.lower().split(' ')
        ]))
    # bk_tree = BKTree(vocabs)
    vocabs_set = set(vocabs)
    print(f'Vocab size = {len(vocabs_set)}')

    # for t, p in zip(target_sentences, greedy_preds_sentences):
    #   if t != p:
    #     for tw, pw in zip(t.split(' '), p.split(' ')):
    #       if tw != pw and pw not in vocabs_set:

    #         candidats = defaultdict(list)
    #         best_d = 100
    #         for w in vocabs_set:
    #           d = ed.eval(w, pw)
    #           if d == best_d:
    #             candidats[d].append(w)
    #           elif d < best_d:
    #             candidats = defaultdict(list)
    #             candidats[d].append(w)
    #             best_d = d
    #           else:
    #             continue
    #         print(f'target = {tw} | pred = {pw}')
    #         input(candidats)

    # resp = bk_tree.query(pw, 2)
    # input(f'{tw} | {pw}\n{resp}')

    lm_preds = []
    for t, p in tqdm(zip(target_sentences, greedy_preds_sentences),
                     total=len(target_sentences)):
        new_source = p
        pw = p.split(' ')
        if any([w not in vocabs_set for w in pw]):
            source = ' '.join([
                tokenizer.mask_token if w not in vocabs_set else w for w in pw
            ])
            enc_source = tokenizer.encode(source, return_tensors='pt')
            mask_token_index = torch.where(
                enc_source == tokenizer.mask_token_id)[1]

            token_logits = model(enc_source)[0]
            mask_token_logits = token_logits[0, mask_token_index, :]

            top_5_tokens = torch.topk(mask_token_logits, 5,
                                      dim=1).indices[0].tolist()

            for token in top_5_tokens:
                if tokenizer.decode([token]) in vocabs_set:
                    new_source = source.replace(tokenizer.mask_token,
                                                tokenizer.decode([token]))
                    break
            # new_source = source.replace(tokenizer.mask_token, tokenizer.decode([top_5_tokens[0]]))
        lm_preds.append(new_source)

    print(
        Data.compute_scores(targets=target_sentences,
                            predictions=lm_preds,
                            rec=False))
    def trainWithMinibootstrap(self, negatives, positives, output_dir=None):
        caches = []
        model = []
        t = time.time()
        for i in range(self.num_classes - 1):
            if (len(positives[i]) != 0) & (len(negatives[i]) != 0):
                print(
                    '---------------------- Training Class number {} ----------------------'
                    .format(i))
                first_time = True
                for j in range(len(negatives[i])):
                    t_iter = time.time()
                    if first_time:
                        dataset = {}
                        dataset['pos'] = positives[i]
                        dataset['neg'] = negatives[i][j]
                        caches.append(dataset)
                        model.append(None)
                        first_time = False
                    else:
                        t_hard = time.time()
                        neg_pred = self.classifier.predict(
                            model[i], negatives[i][j])
                        hard_idx = torch.where(neg_pred > self.hard_tresh)[0]
                        caches[i]['neg'] = torch.cat(
                            (caches[i]['neg'], negatives[i][j][hard_idx]), 0)
                        print('Hard negatives selected in {} seconds'.format(
                            time.time() - t_hard))
                        print('Chosen {} hard negatives from the {}th batch'.
                              format(len(hard_idx), j))

                    print('Traning with {} positives and {} negatives'.format(
                        len(caches[i]['pos']), len(caches[i]['neg'])))
                    t_update = time.time()
                    model[i] = self.updateModel(caches[i])
                    print('Model updated in {} seconds'.format(time.time() -
                                                               t_update))

                    t_easy = time.time()
                    if len(caches[i]['neg']) != 0 and not j == len(
                            negatives[i]) - 1:
                        neg_pred = self.classifier.predict(
                            model[i], caches[i]['neg'])
                        keep_idx = torch.where(neg_pred >= self.easy_tresh)[0]
                        easy_idx = len(caches[i]['neg']) - len(keep_idx)
                        caches[i]['neg'] = caches[i]['neg'][keep_idx]
                        print('Easy negatives selected in {} seconds'.format(
                            time.time() - t_easy))
                        print('Removed {} easy negatives. {} Remaining'.format(
                            easy_idx, len(caches[i]['neg'])))
                        print('Iteration {}th done in {} seconds'.format(
                            j,
                            time.time() - t_iter))
                    # Delete cache of the i-th classifier if it is the last iteration to free memory
                    if j == len(negatives[i]) - 1 and not self.return_caches:
                        caches[i] = None
                        torch.cuda.empty_cache()
            else:
                model.append(None)
                dataset = {}
                caches.append(dataset)

        training_time = time.time() - t
        print('Online Classifier trained in {} seconds'.format(training_time))
        if output_dir and self.is_rpn:
            with open(os.path.join(output_dir, "result.txt"), "a") as fid:
                fid.write(
                    "RPN's Online Classifier training time: {}min:{}s \n".
                    format(int(training_time / 60), round(training_time % 60)))
        elif output_dir and self.is_segmentation:
            with open(os.path.join(output_dir, "result.txt"), "a") as fid:
                fid.write(
                    "Online Segmentation training time: {}min:{}s \n".format(
                        int(training_time / 60), round(training_time % 60)))
        elif output_dir and not self.is_rpn and not self.is_segmentation:
            with open(os.path.join(output_dir, "result.txt"), "a") as fid:
                fid.write(
                    "Detector's Online Classifier training time: {}min:{}s \n".
                    format(int(training_time / 60), round(training_time % 60)))
        if self.return_caches:
            self.caches = caches
        return model
    def _process_feature_extraction(self,
                                    output,
                                    im_scales,
                                    im_infos,
                                    feature_name="fc6",
                                    conf_thresh=0):
        batch_size = len(output[0]["proposals"])
        n_boxes_per_image = [len(boxes) for boxes in output[0]["proposals"]]
        score_list = output[0]["scores"].split(n_boxes_per_image)
        score_list = [torch.nn.functional.softmax(x, -1) for x in score_list]
        feats = output[0][feature_name].split(n_boxes_per_image)
        cur_device = score_list[0].device

        feat_list = []
        info_list = []

        for i in range(batch_size):
            dets = output[0]["proposals"][i].bbox / im_scales[i]
            scores = score_list[i]
            max_conf = torch.zeros(scores.shape[0]).to(cur_device)
            conf_thresh_tensor = torch.full_like(max_conf, conf_thresh)
            start_index = 1
            # Column 0 of the scores matrix is for the background class
            if self.args.background:
                start_index = 0
            for cls_ind in range(start_index, scores.shape[1]):
                cls_scores = scores[:, cls_ind]
                keep = nms(dets, cls_scores, 0.5)
                max_conf[keep] = torch.where(
                    # Better than max one till now and minimally greater
                    # than conf_thresh
                    (cls_scores[keep] > max_conf[keep])
                    & (cls_scores[keep] > conf_thresh_tensor[keep]),
                    cls_scores[keep],
                    max_conf[keep],
                )

            sorted_scores, sorted_indices = torch.sort(max_conf,
                                                       descending=True)
            num_boxes = (sorted_scores[:self.args.num_features] != 0).sum()
            keep_boxes = sorted_indices[:self.args.num_features]
            feat_list.append(feats[i][keep_boxes])
            bbox = output[0]["proposals"][i][keep_boxes].bbox / im_scales[i]
            # Predict the class label using the scores
            objects = torch.argmax(scores[keep_boxes][:, start_index:], dim=1)

            info_list.append({
                "bbox":
                bbox.cpu().numpy(),
                "num_boxes":
                num_boxes.item(),
                "objects":
                objects.cpu().numpy(),
                "cls_prob":
                scores[keep_boxes][:, start_index:].cpu().numpy(),
                "image_width":
                im_infos[i]["width"],
                "image_height":
                im_infos[i]["height"],
            })

        return feat_list, info_list
Example #22
0
def main():
    coeff = []
    ang_sb = []
    ang_np = []
    p_angles = []
    inputs_train, targets_train, inputs_test, targets_test = data.generate_data(
        args)
    results = {
        'inputs_train': inputs_train,
        'targets_train': targets_train,
        'inputs_test': inputs_test,
        'targets_test': targets_test
    }

    # Actual Model that is being observed
    mdl = model.create_model(args, inputs_train, targets_train)
    print(
        "\n===============================================================================================\n"
    )

    start_params = mdl.params_flat

    # NOTE Pickling Initial Weights
    with open('outfile', 'wb') as sp:
        pickle.dump(mdl.params_flat, sp)

    new_params = train_model(args, mdl, results)

    with open('outfile', 'rb') as sp:
        start_params = pickle.load(sp)

    # NOTE Lottery Ticket Pruning Loop
    per = args.per
    nonzer = (np.count_nonzero(mdl.params_flat))
    zer = len(mdl.params_flat) - nonzer
    x1 = nonzer - zer
    z1 = int(((x1 / 100.) * per))
    zer = z1 + zer
    print(" {} + {} = {}".format(0, nonzer, len(mdl.params_flat)))

    new_params, inputs, outputs = train_model(args, mdl, results)
    hess = mdl.hessian(mdl.params_flat)  # Calculating Hessian
    # Converting the Hessian to Tensor
    hess = torch.tensor(hess).float()
    eigenvalues, eigenvec = torch.symeig(hess, eigenvectors=True)

    hess, eigenvalues, eigenvec, coeff, ang_np, ang_sb, p_angles, top_vec = invar(
        mdl, args, inputs_train, targets_train, hess, eigenvalues, eigenvec,
        coeff, ang_np, ang_sb, p_angles)

    # NOTE Pruning Loop

    print(
        "===============================================================================================\n"
    )
    for i in tqdm(range(0, args.prune_iter),
                  desc="Pruning Progress",
                  dynamic_ncols=True):
        print("\n{} +".format(zer)),

        pruned_params_flat, zer, nonzer = prune_function(mdl, zer)
        print("{} = {}".format(nonzer, len(mdl.params_flat)))
        x1 = nonzer - zer
        z1 = int((x1 / 100.) * per)
        zer = z1 + zer

        for p in range(0, len(start_params)):
            if (pruned_params_flat[p] != 0.):
                pruned_params_flat[p] = start_params[p]

        mdl.params_flat = pruned_params_flat
        new_params, coeff = train_pruned_model(args, mdl, results, top_vec,
                                               coeff)

    coeff = torch.tensor(coeff)
    for i in range(coeff.shape[0]):
        a = torch.zeros(coeff[i].shape[0]).long()
        b = torch.arange(0, coeff[i].shape[0])
        c = torch.where(((coeff[i] > -0.1) & (coeff[i] < 0.1)), b, a)
        z = torch.zeros(coeff[i].shape[0]).fill_(0)
        z[torch.nonzero(c)] = coeff[i][torch.nonzero(c)]
        z = np.array(z)
        plt.plot(z)
    plt.xlabel('Dimension', fontsize=14)
    plt.ylabel('Coefficient', fontsize=14)
    pnpy = args.results_folder + '/plot1.png'
    plt.savefig(pnpy, format='png', pad_inches=5)

    args.suffix = args.results_folder + '/coeff.npy'
    np.save(args.suffix, coeff)
    args.suffix = args.results_folder + '/ang_sb.npy'
    np.save(args.suffix, ang_sb)
    args.suffix = args.results_folder + '/ang_np.npy'
    np.save(args.suffix, ang_np)
    args.suffix = args.results_folder + '/p_angles.npy'
    np.save(args.suffix, p_angles)

    return args.results_folder
Example #23
0
 def forward(self, x):
     output = torch.where(x >= 0, torch.ones_like(x), -torch.ones_like(x))
     return output
    rc("text", usetex=True)

    X_0 = X_0.cpu()
    Y_0 = Y_0.cpu()

    # +++ ground truth +++
    fig, ax = plt.subplots(clear=True, figsize=(2.5, 2.5), dpi=200)
    im = _implot(ax, X_0)

    # method-wise plots
    for (idx, method) in methods.iterrows():

        # +++ reconstructions per noise level +++
        for idx_noise in range(len(noise_rel_show)):

            idx_noise_cur = torch.where(
                noise_rel == noise_rel_show[idx_noise])[0]
            X_cur = results.loc[idx].X[idx_noise_cur, ...].squeeze(0)

            fig, ax = plt.subplots(
                1,
                2,
                clear=True,
                figsize=(5, 2.5),
                dpi=200,
                gridspec_kw={"wspace": 0.02},
            )

            im = _implot(ax[0], X_cur)

            im = _implot(ax[1], (X_cur - X_0[0:1, ...]).abs(), vmax=0.6)
Example #25
0
    def forward(self, classifications, regressions, anchors, annotations, **kwargs):
        alpha = 0.25
        gamma = 2.0
        batch_size = classifications.shape[0]
        classification_losses = []
        regression_losses = []

        anchor = anchors[0, :, :]  # assuming all image sizes are the same, which it is
        dtype = anchors.dtype

        anchor_widths = anchor[:, 3] - anchor[:, 1]
        anchor_heights = anchor[:, 2] - anchor[:, 0]
        anchor_ctr_x = anchor[:, 1] + 0.5 * anchor_widths
        anchor_ctr_y = anchor[:, 0] + 0.5 * anchor_heights

        for j in range(batch_size):

            classification = classifications[j, :, :]
            regression = regressions[j, :, :]

            bbox_annotation = annotations[j]
            bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]

            classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)

            if bbox_annotation.shape[0] == 0:
                if torch.cuda.is_available():

                    alpha_factor = torch.ones_like(classification) * alpha
                    alpha_factor = alpha_factor.cuda()
                    alpha_factor = 1. - alpha_factor
                    focal_weight = classification
                    focal_weight = alpha_factor * torch.pow(focal_weight, gamma)

                    bce = -(torch.log(1.0 - classification))

                    cls_loss = focal_weight * bce

                    regression_losses.append(torch.tensor(0).to(dtype).cuda())
                    classification_losses.append(cls_loss.sum())
                else:

                    alpha_factor = torch.ones_like(classification) * alpha
                    alpha_factor = 1. - alpha_factor
                    focal_weight = classification
                    focal_weight = alpha_factor * torch.pow(focal_weight, gamma)

                    bce = -(torch.log(1.0 - classification))

                    cls_loss = focal_weight * bce

                    regression_losses.append(torch.tensor(0).to(dtype))
                    classification_losses.append(cls_loss.sum())

                continue

            IoU = calc_iou(anchor[:, :], bbox_annotation[:, :4])

            IoU_max, IoU_argmax = torch.max(IoU, dim=1)

            # compute the loss for classification
            targets = torch.ones_like(classification) * -1
            if torch.cuda.is_available():
                targets = targets.cuda()

            targets[torch.lt(IoU_max, 0.4), :] = 0

            positive_indices = torch.ge(IoU_max, 0.5)

            num_positive_anchors = positive_indices.sum()

            assigned_annotations = bbox_annotation[IoU_argmax, :]

            targets[positive_indices, :] = 0
            targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1

            alpha_factor = torch.ones_like(targets) * alpha
            if torch.cuda.is_available():
                alpha_factor = alpha_factor.cuda()

            alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor)
            focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification)
            focal_weight = alpha_factor * torch.pow(focal_weight, gamma)

            bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification))

            cls_loss = focal_weight * bce

            zeros = torch.zeros_like(cls_loss)
            if torch.cuda.is_available():
                zeros = zeros.cuda()
            cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, zeros)

            classification_losses.append(cls_loss.sum() / torch.clamp(num_positive_anchors.to(dtype), min=1.0))

            if positive_indices.sum() > 0:
                assigned_annotations = assigned_annotations[positive_indices, :]

                anchor_widths_pi = anchor_widths[positive_indices]
                anchor_heights_pi = anchor_heights[positive_indices]
                anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
                anchor_ctr_y_pi = anchor_ctr_y[positive_indices]

                gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0]
                gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1]
                gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths
                gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights

                # efficientdet style
                gt_widths = torch.clamp(gt_widths, min=1)
                gt_heights = torch.clamp(gt_heights, min=1)

                targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
                targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
                targets_dw = torch.log(gt_widths / anchor_widths_pi)
                targets_dh = torch.log(gt_heights / anchor_heights_pi)

                targets = torch.stack((targets_dy, targets_dx, targets_dh, targets_dw))
                targets = targets.t()

                regression_diff = torch.abs(targets - regression[positive_indices, :])

                regression_loss = torch.where(
                    torch.le(regression_diff, 1.0 / 9.0),
                    0.5 * 9.0 * torch.pow(regression_diff, 2),
                    regression_diff - 0.5 / 9.0
                )
                regression_losses.append(regression_loss.mean())
            else:
                if torch.cuda.is_available():
                    regression_losses.append(torch.tensor(0).to(dtype).cuda())
                else:
                    regression_losses.append(torch.tensor(0).to(dtype))

        # debug
        imgs = kwargs.get('imgs', None)
        if imgs is not None:
            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()
            obj_list = kwargs.get('obj_list', None)
            out = postprocess(imgs.detach(),
                              torch.stack([anchors[0]] * imgs.shape[0], 0).detach(), regressions.detach(),
                              classifications.detach(),
                              regressBoxes, clipBoxes,
                              0.5, 0.3)
            imgs = imgs.permute(0, 2, 3, 1).cpu().numpy()
            imgs = ((imgs * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255).astype(np.uint8)
            imgs = [cv2.cvtColor(img, cv2.COLOR_RGB2BGR) for img in imgs]
            display(out, imgs, obj_list, imshow=False, imwrite=True)

        return [torch.stack(classification_losses).mean(dim=0), \
                torch.stack(regression_losses).mean(
                    dim=0) * 50]  # https://github.com/google/automl/blob/6fdd1de778408625c1faf368a327fe36ecd41bf7/efficientdet/hparams_config.py#L233
Example #26
0
def train_class_one_t(dhandler_class, dhandlers_rp, dec, d_hnet, net, 
                                                    device, config, writer, t):
    """Train continual learning experiments on MNIST dataset for one task.
    In this function the main training logic is implemented. 
    After setting the optimizers for the network and hypernetwork if 
    applicable, the training is structured as follows: 
    First, we get the a training batch of the current task. Depending on 
    the learning scenario, we choose output heads and build targets 
    accordingly. 
    Second, if ``t`` is greater than 1, we add a loss term concerning 
    predictions of replayed data. See :func:`get_fake_data_loss` for 
    details. Third, to protect the hypernetwork from forgetting, we add an 
    additional L2 loss term namely the difference between its current output 
    given an embedding and checkpointed targets.
    Finally, we track some training statistics.

    Args:
        (....): See docstring of function :func:`train_tasks`.
        t: Task id.
    """

    # if cl with task inference we have the classifier empowered with a hnet 
    if config.training_with_hnet:
        net_hnet = net[1]
        net = net[0]
        net.train()
        net_hnet.train()
        params_to_regularize = list(net_hnet.theta)
        optimizer = optim.Adam(params_to_regularize,
            lr=config.class_lr, betas=(0.9, 0.999))

        c_emb_optimizer = optim.Adam([net_hnet.get_task_emb(t)], 
               lr=config.class_lr_emb, betas=(0.9, 0.999))
    else:
        net.train()
        net_hnet = None
        optimizer = optim.Adam(net.parameters(),
                lr=config.class_lr, betas=(0.9, 0.999))

    # dont train the replay model if available
    if dec is not None:
        dec.eval()
    if d_hnet is not None:
        d_hnet.eval()

    # compute targets if classifier is trained with hnet
    if t > 0 and config.training_with_hnet:
        if config.online_target_computation:
            # Compute targets for the regularizer whenever they are needed.
            # -> Computationally expensive.
            targets_C = None
            prev_theta = [p.detach().clone() for p in net_hnet.theta]
            prev_task_embs = [p.detach().clone() for p in \
                                                      net_hnet.get_task_embs()]
        else:
            # Compute targets for the regularizer once and keep them all in
            # memory -> Memory expensive.
            targets_C = hreg.get_current_targets(t, net_hnet)
            prev_theta = None
            prev_task_embs = None


    dhandler_class.reset_batch_generator()

    # make copy of network
    if t >= 1:
        net_copy = copy.deepcopy(net)

    # set training_iterations if epochs are set
    if config.epochs == -1:
        training_iterations = config.n_iter
    else:
        assert(config.epochs > 0)
        training_iterations = config.epochs * \
        int(np.ceil(dhandler_class.num_train_samples / config.batch_size))

    if config.class_incremental:
        training_iterations = int(training_iterations/config.out_dim)

    # Whether we will calculate the regularizer.
    calc_reg = t > 0 and config.class_beta > 0 and config.training_with_hnet

    # set if we want the reg only computed for a subset of the  previous tasks
    if config.hnet_reg_batch_size != -1:
        hnet_reg_batch_size = config.hnet_reg_batch_size
    else:
        hnet_reg_batch_size = None
    
    for i in range(training_iterations):

        # set optimizer to zero
        optimizer.zero_grad()
        if net_hnet is not None:
            c_emb_optimizer.zero_grad()

        # Get real data
        real_batch = dhandler_class.next_train_batch(config.batch_size)
        X_real = dhandler_class.input_to_torch_tensor(real_batch[0], device, 
                                                                mode='train')
        T_real = dhandler_class.output_to_torch_tensor(real_batch[1],device, 
                                                                mode='train')
        
        if i % 100 == 0 and config.show_plots:
            fig_real = _plotImages(X_real, config)
            writer.add_figure('train_class_' + str(t) + '_real', 
                                                    fig_real, global_step=i)
        
        #################################################
        # Choosing output heads and constructing targets
        ################################################# 

        # If we train a task inference net or class incremental learning we 
        # we construct a target for every single class/task
        if config.class_incremental or config.training_task_infer:
            # in the beginning of training, we look at two output neuron
            task_out = [0, t+1]
            T_real = torch.zeros((config.batch_size, task_out[1])).to(device)
            T_real[:, task_out[1] - 1] = 1

        elif config.cl_scenario == 1 or config.cl_scenario == 2:
            if config.cl_scenario == 1:
                # take the task specific output neuron
                task_out = [t*config.out_dim, t*config.out_dim + config.out_dim]

            else:
                # always all output neurons, only one head is used
                task_out = [0, config.out_dim]
        else:
            # The number of output neurons is generic and can grow i.e. we
            # do not have to know the number of tasks before we start 
            # learning.
            if not config.infer_output_head:
                task_out = [0,(t+1)*config.out_dim]
                T_real = torch.cat((torch.zeros((config.batch_size, 
                        t * config.out_dim)).to(device), 
                        T_real), dim=1)
            # this is a special case where we will infer the task id by another 
            # neural network so we can train on the correct output head direclty
            # and use the infered output head to compute the prediction
            else:
                task_out =[t*config.out_dim, t*config.out_dim + config.out_dim]
        
        # compute loss of current data
        if config.training_with_hnet:
            weights_c = net_hnet.forward(t)
        else:
            weights_c = None

        # print('weight_c: ',weights_c)
        Y_hat_logits = net.forward(X_real, weights_c)
        Y_hat_logits = Y_hat_logits[:, task_out[0]:task_out[1]]

        if config.soft_targets:
            soft_label = 0.95
            num_classes = T_real.shape[1]
            soft_targets = torch.where(T_real == 1,
                torch.Tensor([soft_label]).to(device),
                torch.Tensor([(1 - soft_label) / (num_classes-1)]).to(device))
            soft_targets = soft_targets.to(device)
            loss_task = Classifier.softmax_and_cross_entropy(Y_hat_logits,
                                                        soft_targets)
        else:

            # print('Y_hat_logits: ',Y_hat_logits.size())
            # print('T_real: ',T_real.size())
            # print('task_out: ',task_out)

            loss_task =Classifier.softmax_and_cross_entropy(Y_hat_logits,T_real)


        ############################
        # compute loss for fake data
        ############################

        # Get fake data (of all tasks up until now and merge into list)
        if t >= 1 and not config.training_with_hnet:
            fake_loss = get_fake_data_loss(dhandlers_rp, net, dec,d_hnet,device, 
                                        config, writer, t, i, net_copy)
            loss_task = (1-config.l_rew)*loss_task + config.l_rew*fake_loss

        
        loss_task.backward(retain_graph=calc_reg, create_graph=calc_reg and \
                           config.backprop_dt)
     
        # compute hypernet loss and fix embedding -> change current embs
        if calc_reg:
            if config.no_lookahead:
                dTheta = None
            else:
                dTheta = opstep.calc_delta_theta(optimizer,
                    config.use_sgd_change, lr=config.class_lr,
                    detach_dt=not config.backprop_dt)          
            loss_reg = config.class_beta*hreg.calc_fix_target_reg(net_hnet, t,
                        targets=targets_C, mnet=net, dTheta=dTheta, dTembs=None,
                        prev_theta=prev_theta, prev_task_embs=prev_task_embs,
                        batch_size=hnet_reg_batch_size)
            loss_reg.backward()

        # compute backward passloss_task.backward()
        if not config.dont_train_main_model:
            optimizer.step()

        if net_hnet is not None and config.train_class_embeddings:
            c_emb_optimizer.step()

        # same stats saving
        if i % 50 == 0:
            # compute accuracies for tracking
            Y_hat_logits = net.forward(X_real, weights_c)
            Y_hat_logits = Y_hat_logits[:, task_out[0]:task_out[1]]
            Y_hat = F.softmax(Y_hat_logits, dim=1)
            classifier_accuracy = Classifier.accuracy(Y_hat, T_real) * 100.0
            writer.add_scalar('train/task_%d/class_accuracy' % t,
                                                    classifier_accuracy, i)
            writer.add_scalar('train/task_%d/loss_task' % t,
                                                    loss_task, i)
            if t >= 1 and not config.training_with_hnet:
                writer.add_scalar('train/task_%d/fake_loss' % t,
                                                    fake_loss, i)

        # plot some gradient statistics
        if i % 200 == 0:
            if not config.dont_train_main_model:
                total_norm = 0
                if config.training_with_hnet:
                    params = net_hnet.theta
                else:
                    params = net.parameters()

                for p in params:
                    param_norm = p.grad.data.norm(2)
                    total_norm += param_norm.item() ** 2
                total_norm = total_norm ** (1. / 2)
                # TODO write gradient histograms?
                writer.add_scalar('train/task_%d/main_params_grad_norms' % t,
                                                    total_norm, i)

            if net_hnet is not None and config.train_class_embeddings:
                    total_norm = 0
                    for p in [net_hnet.get_task_emb(t)]:
                        param_norm = p.grad.data.norm(2)
                        total_norm += param_norm.item() ** 2
                    total_norm = total_norm ** (1. / 2)
                    writer.add_scalar('train/task_%d/hnet_emb_grad_norms' % t,
                                                    total_norm, i)
                                                                                                 
        if i % 200 == 0:
            msg = 'Training step {}: Classifier Accuracy: {:.3f} ' + \
                '(on current training batch).'
            print(msg.format(i, classifier_accuracy))
Example #27
0
 def sample_fn(model):
     sample = torch.sigmoid(model.sample(n_samples=16))
     return torch.where(sample < 0.5, torch.zeros_like(sample),
                        torch.ones_like(sample))
Example #28
0
 def reset_state(self, h, d):
     return torch.where(d.unsqueeze(-1), torch.zeros_like(h), h)
Example #29
0
 def entropy(self):
     if len(self.masks) == 0:
         return super(CategoricalMasked, self).entropy()
     p_log_p = self.logits * self.probs
     p_log_p = torch.where(self.masks, p_log_p, torch.tensor(0.).to(device))
     return -p_log_p.sum(-1)
def eval_single_comparison_with_SVM_hinge_loss(o_p_ref,comparisons,hrjf,o_ori_img):
    # print("o_p_ref",o_p_ref.shape)
    # o_p_ref torch.Size([3, 3, 384, 512])

    # c refl_img: mean of one predicted reflectance
    # refl_img=torch.mean(o_p_ref,dim=0,keepdim=True).squeeze()
    # print("m_o_p_ref",m_o_p_ref.shape)
    # m_o_p_ref torch.Size([341, 512])

    refl_img=o_p_ref.squeeze()

    # --------------------------------------------------
    rows=o_ori_img.shape[0]
    cols=o_ori_img.shape[1]

    # --------------------------------------------------
    # c error_sum: sum all errors from all comparisons in one image
    error_sum=0.0
    # c weight_sum: sum all weights from all comparisons in one image
    weight_sum=0.0
    num_comp=float(len(comparisons))

    # --------------------------------------------------
    # JSON GT for 1 image, 
    # containing all relative reflectance comparisons information
    # c c: one comparison from 1 image's all comparisons
    for c in comparisons:
        # c n_po1: number of point1
        n_po1=c['point1']
        # c point1: Point1 from one comparison
        point1 = hrjf.id_to_points[n_po1]

        n_po2=c['point2']
        # c point2: Point2 from one comparison
        point2=hrjf.id_to_points[n_po2]

        # c darker: Darker information from one comparison
        darker=c['darker']
        
        # Weight information from one comparison
        weight=c['darker_score'] # 1.14812035203497
        # print("weight",weight)

        # --------------------------------------------------
        # Check exception
        if not point1['opaque'] or not point2['opaque']:
            # Pass this judgement
            continue
        # weight<0 or weight is None -> invalid darker_score so pass
        if weight<0 or weight is None:
            raise ValueError("Invalid darker_score: %s" % weight)
        if darker not in ('1','2','E'):
            raise ValueError("Invalid darker: %s" % darker)
        
        # --------------------------------------------------
        x1,y1,x2,y2,darker=int(point1['x']*cols),\
                           int(point1['y']*rows),\
                           int(point2['x']*cols),\
                           int(point2['y']*rows),\
                           darker

        # --------------------------------------------------        
        # c R1: scalar intensity value of point1 from predicted intensity image
        R1=refl_img[y1,x1]
        R2=refl_img[y2,x2]
        R2=torch.where(
            torch.abs(R2)<1e-4,torch.Tensor([1e-4]).squeeze().cuda(),R2)
        
        # --------------------------------------------------
        div_R1_R2=torch.div(R1,R2)
        
        # c dx_inv: 1+delta+xi inverse
        dx_inv=(1.0/(1.0+delta+xi))
        # c dx: 1+delta+xi
        dx=(1.0+delta+xi)
        # c dx_m_inv: 1+delta-i inverse
        dx_m_inv=(1.0/(1.0+delta-xi))
        # c dx_m: 1+delta-xi
        dx_m=(1.0+delta-xi)

        # --------------------------------------------------
        if darker=='1':
            # c ersp: error of single pair
            ersp=torch.max(torch.Tensor([0.0]).cuda(),div_R1_R2-dx_inv)
            error_sum+=ersp
            weight_sum+=weight
        elif darker=='2':
            ersp=torch.max(torch.Tensor([0.0]).cuda(),dx-div_R1_R2)
            error_sum+=ersp
            weight_sum+=weight
        elif darker=='E':
            if xi<=delta:
                ersp=torch.max(torch.Tensor([0.0]).cuda(),dx_m_inv-div_R1_R2)
                error_sum+=ersp
                weight_sum+=weight
            else:
                ersp=torch.max(torch.Tensor([0.0]).cuda(),div_R1_R2-dx_m)
                error_sum+=ersp
                weight_sum+=weight

    # Now, you have processed all comparisons in one image
    # If weight_sum exist
    if weight_sum:
        # c whdr: calculated whdr of one image
        whdr=error_sum/weight_sum

    # If weight_sum=0, it means there's no comparisons
    # In that case, you assign 0 into whdr
    else:
        whdr=0.0

    # Return whdr score of one image
    return whdr/num_comp
def beta_smooth_l1_loss(input: Tensor, target: Tensor, beta: float) -> Tensor:
    diff = torch.abs(input - target)  # 计算 坐标的 差值
    loss = torch.where(diff < beta, 0.5 * diff**2 / beta, diff - 0.5 * beta)
    loss = loss.sum() / (input.numel() + 1e-8)  # 数值稳定 1e-8
    return loss
Example #32
0
print(x[0].shape)  # x[0, :]

print(x[:, 0].shape)

print(x[2, 0:10].shape)  # 0:10 --> [0, 1, 2, ... 9]

print(x[0, 0])

# Fancy indexing
x = torch.arange(10)
indices = [2, 5, 8]

print(x[indices])

x = torch.rand((3, 5))
rows = torch.tensor([1, 0])
cols = torch.tensor([4, 0])
print(x[rows, cols])

# More advanced indexing
x = torch.arange(10)
print(x[(x < 2) | (x > 8)])
print(x[x.remainder(2) == 0])

# Other stuff
print(torch.where(x > 5, x, x * 2))
print(torch.tensor([0, 0, 1, 2, 2, 3, 4]).unique())
print(x.ndimension())  # 5x5x5 ==> 3
print(x.numel())  # number of elements
    def train(self, train_examples, task_name, output_mode, eval_labels,
              num_labels, train_dataloader, eval_dataloader, eval_examples,
              tokenizer, mm_eval_labels, mm_eval_dataloader):
        """ quant-aware pretraining + KD """

        # Prepare loss functions
        loss_mse = MSELoss()

        self.teacher_model.eval()
        teacher_results = self._do_eval(self.teacher_model, task_name,
                                        eval_dataloader, output_mode,
                                        eval_labels, num_labels)
        logging.info("Teacher network evaluation")
        for key in sorted(teacher_results.keys()):
            logging.info("  %s = %s", key, str(teacher_results[key]))

        self.teacher_model.train(
        )  # switch to train mode to supervise students

        # Train and evaluate
        # num_layers = self.student_model.config.num_hidden_layers + 1
        global_step = self.prev_global_step
        best_dev_acc = 0.0
        output_eval_file = os.path.join(self.args.output_dir,
                                        "eval_results.txt")

        logging.info("***** Running training, Task: %s, Job id: %s*****" %
                     (self.args.task_name, self.args.job_id))
        logging.info(" Distill rep attn: %d, Distill logit: %d" %
                     (self.args.distill_rep_attn, self.args.distill_logit))
        logging.info("  Num examples = %d", len(train_examples))
        logging.info("  Batch size = %d", self.args.batch_size)
        logging.info("  Num steps = %d", self.num_train_optimization_steps)

        global_tr_loss = 0  # record global average training loss to plot

        for epoch_ in range(self.args.num_train_epochs):

            tr_loss = 0.
            tr_att_loss = 0.
            tr_rep_loss = 0.
            tr_cls_loss = 0.

            nb_tr_examples, nb_tr_steps = 0, 0

            for step, batch in enumerate(train_dataloader):

                self.student_model.train()

                batch = tuple(t.to(self.device) for t in batch)

                input_ids, input_mask, segment_ids, label_ids, seq_lengths = batch

                att_loss = 0.
                rep_loss = 0.
                cls_loss = 0.
                rep_loss_layerwise = []
                att_loss_layerwise = []

                student_logits, student_atts, student_reps = self.student_model(
                    input_ids, segment_ids, input_mask)

                if self.args.distill_logit or self.args.distill_rep_attn:
                    # use distillation

                    with torch.no_grad():
                        teacher_logits, teacher_atts, teacher_reps = self.teacher_model(
                            input_ids, segment_ids, input_mask)

                    # NOTE: config loss according to stage
                    loss = 0.
                    if self.args.distill_logit:
                        cls_loss = soft_cross_entropy(
                            student_logits / self.args.temperature,
                            teacher_logits / self.args.temperature)
                        loss += cls_loss
                        tr_cls_loss += cls_loss.item()

                    if self.args.distill_rep_attn:
                        for student_att, teacher_att in zip(
                                student_atts, teacher_atts):
                            student_att = torch.where(
                                student_att <= -1e2,
                                torch.zeros_like(student_att).to(self.device),
                                student_att)
                            teacher_att = torch.where(
                                teacher_att <= -1e2,
                                torch.zeros_like(teacher_att).to(self.device),
                                teacher_att)

                            tmp_loss = loss_mse(student_att, teacher_att)
                            att_loss += tmp_loss
                            att_loss_layerwise.append(tmp_loss.item())

                        for student_rep, teacher_rep in zip(
                                student_reps, teacher_reps):
                            tmp_loss = loss_mse(student_rep, teacher_rep)
                            rep_loss += tmp_loss
                            rep_loss_layerwise.append(tmp_loss.item())

                        tr_att_loss += att_loss.item()
                        tr_rep_loss += rep_loss.item()

                        loss += rep_loss + att_loss

                else:
                    if output_mode == "classification":
                        loss_fct = CrossEntropyLoss()
                        loss = loss_fct(student_logits, label_ids.view(-1))
                    elif output_mode == "regression":
                        loss_mse = MSELoss()
                        loss = loss_mse(student_logits.view(-1),
                                        label_ids.view(-1))

                if self.n_gpu > 1:
                    loss = loss.mean()  # mean() to average on multi-gpu.
                if self.args.gradient_accumulation_steps > 1:
                    loss = loss / self.args.gradient_accumulation_steps

                loss.backward()

                tr_loss += loss.item()
                global_tr_loss += loss.item()
                nb_tr_examples += label_ids.size(0)
                nb_tr_steps += 1

                # evaluation and save model
                if global_step % self.args.eval_step == 0 or \
                        global_step == len(train_dataloader)-1:

                    # logging.info("***** KDLearner %s Running evaluation, Task: %s, Job_id: %s *****" % (stage, self.args.task_name, self.args.job_id))
                    logging.info("  Epoch = {} iter {} step".format(
                        epoch_, global_step))
                    logging.info("  Num examples = %d", len(eval_examples))
                    logging.info(f"  Previous best = {best_dev_acc}")

                    loss = tr_loss / (step + 1)
                    global_avg_loss = global_tr_loss / (global_step + 1)
                    cls_loss = tr_cls_loss / (step + 1)
                    att_loss = tr_att_loss / (step + 1)
                    rep_loss = tr_rep_loss / (step + 1)

                    self.student_model.eval()
                    result = self._do_eval(self.student_model, task_name,
                                           eval_dataloader, output_mode,
                                           eval_labels, num_labels)
                    result['global_step'] = global_step
                    result['cls_loss'] = cls_loss
                    result['att_loss'] = att_loss
                    result['rep_loss'] = rep_loss
                    result['loss'] = loss
                    result['global_loss'] = global_avg_loss

                    preds = student_logits.detach().cpu().numpy()
                    train_label = label_ids.cpu().numpy()
                    if output_mode == "classification":
                        preds = np.argmax(preds, axis=1)
                    elif output_mode == "regression":
                        preds = np.squeeze(preds)
                    result['train_batch_acc'] = list(
                        compute_metrics(task_name, preds,
                                        train_label).values())[0]

                    if self.args.distill_rep_attn:
                        logging.info("embedding layer rep_loss: %.8f" %
                                     (rep_loss_layerwise[0]))
                        rep_loss_layerwise = rep_loss_layerwise[1:]
                        for lid in range(len(rep_loss_layerwise)):
                            logging.info("layer %d rep_loss: %.8f" %
                                         (lid + 1, rep_loss_layerwise[lid]))
                            logging.info("layer %d att_loss: %.8f" %
                                         (lid + 1, att_loss_layerwise[lid]))

                    result_to_file(result, output_eval_file)

                    save_model = False

                    if task_name in acc_tasks and result['acc'] > best_dev_acc:
                        best_dev_acc = result['acc']
                        save_model = True

                    if task_name in corr_tasks and result[
                            'corr'] > best_dev_acc:
                        best_dev_acc = result['corr']
                        save_model = True

                    if task_name in mcc_tasks and result['mcc'] > best_dev_acc:
                        best_dev_acc = result['mcc']
                        save_model = True

                    if save_model:
                        self._save()

                        if task_name == "mnli":
                            logging.info('MNLI-mm Evaluation')
                            result = self._do_eval(self.student_model,
                                                   'mnli-mm',
                                                   mm_eval_dataloader,
                                                   output_mode, mm_eval_labels,
                                                   num_labels)
                            result['global_step'] = global_step
                            tmp_output_eval_file = os.path.join(
                                self.output_dir + '-MM', "eval_results.txt")
                            result_to_file(result, tmp_output_eval_file)

                # if self.args.quantize_weight:
                # self.quanter.restore()

                if (step + 1) % self.args.gradient_accumulation_steps == 0:
                    self.optimizer.step()
                    self.optimizer.zero_grad()
                    global_step += 1
Example #34
0
def get_inpainting_mask(x):
    mask = torch.ones(x.shape, device=x.device)
    bs, x, y = torch.where(x.sum(dim=1) == -3)
    mask[bs, :, x, y] = 0
    return mask
Example #35
0
    def forward(self, images, targets=None):
        # type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]
        """
        Arguments:
            images (list[Tensor]): images to be processed
            targets (list[Dict[Tensor]]): ground-truth boxes present in the image (optional)
        Returns:
            result (list[BoxList] or dict[Tensor]): the output from the model.
                During training, it returns a dict[Tensor] which contains the losses.
                During testing, it returns list[BoxList] contains additional fields
                like `scores`, `labels` and `mask` (for Mask R-CNN models).
        """
        if self.training and targets is None:
            raise ValueError("In training mode, targets should be passed")

        if self.training:
            assert targets is not None
            for target in targets:
                boxes = target["boxes"]
                if isinstance(boxes, torch.Tensor):
                    if len(boxes.shape) != 2 or boxes.shape[-1] != 4:
                        raise ValueError("Expected target boxes to be a tensor"
                                         "of shape [N, 4], got {:}.".format(
                                             boxes.shape))
                else:
                    raise ValueError("Expected target boxes to be of type "
                                     "Tensor, got {:}.".format(type(boxes)))

        # get the original image sizes
        original_image_sizes = torch.jit.annotate(List[Tuple[int, int]], [])
        for img in images:
            val = img.shape[-2:]
            assert len(val) == 2
            original_image_sizes.append((val[0], val[1]))

        # transform the input
        images, targets = self.transform(images, targets)

        # Check for degenerate boxes
        # TODO: Move this to a function
        if targets is not None:
            for target_idx, target in enumerate(targets):
                boxes = target["boxes"]
                degenerate_boxes = boxes[:, 2:] <= boxes[:, :2]
                if degenerate_boxes.any():
                    # print the first degenerate box
                    bb_idx = torch.where(degenerate_boxes.any(dim=1))[0][0]
                    degen_bb: List[float] = boxes[bb_idx].tolist()
                    raise ValueError(
                        "All bounding boxes should have positive height and width."
                        " Found invalid box {} for target at index {}.".format(
                            degen_bb, target_idx))

        # get the features from the backbone
        features = self.backbone(images.tensors)
        if isinstance(features, torch.Tensor):
            features = OrderedDict([('0', features)])

        # TODO: Do we want a list or a dict?
        features = list(features.values())

        # compute the retinanet heads outputs using the features
        head_outputs = self.head(features)

        # create the set of anchors
        anchors = self.anchor_generator(images, features)

        losses = {}
        detections = torch.jit.annotate(List[Dict[str, Tensor]], [])
        if self.training:
            assert targets is not None

            # compute the losses
            losses = self.compute_loss(targets, head_outputs, anchors)
        else:
            # compute the detections
            # print(self.ssm)
            if self.ssm:
                detections = self.ssm_postprocess_detections(
                    head_outputs, anchors, images.image_sizes)
            else:
                detections = self.postprocess_detections(
                    head_outputs, anchors, images.image_sizes)
            detections = self.transform.postprocess(detections,
                                                    images.image_sizes,
                                                    original_image_sizes)

        if torch.jit.is_scripting():
            if not self._has_warned:
                warnings.warn(
                    "RetinaNet always returns a (Losses, Detections) tuple in scripting"
                )
                self._has_warned = True
            return (losses, detections)
        return self.eager_outputs(losses, detections)
Example #36
0
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
    ):
        r"""
        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
            Labels for computing the token classification loss. Indices should be in ``[0, ..., config.num_labels -
            1]``.
        """
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        outputs = self.transformer(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        sequence_output = outputs[0]

        sequence_output = self.dropout(sequence_output)
        logits = self.classifier(sequence_output)

        loss = None
        if labels is not None:
            loss_fct = CrossEntropyLoss()
            # Only keep active parts of the loss
            if attention_mask is not None:
                active_loss = attention_mask.view(-1) == 1
                active_logits = logits.view(-1, self.num_labels)
                active_labels = torch.where(
                    active_loss, labels.view(-1),
                    torch.tensor(loss_fct.ignore_index).type_as(labels))
                loss = loss_fct(active_logits, active_labels)
            else:
                loss = loss_fct(logits.view(-1, self.num_labels),
                                labels.view(-1))

        if not return_dict:
            output = (logits, ) + outputs[2:]
            return ((loss, ) + output) if loss is not None else output

        return TokenClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )
Example #37
0
def compute_vertex_normal(vertices: torch.Tensor,
                          indices: torch.Tensor):
    """
        Compute vertex normal by weighted average of nearby face normals using Nelson Max's algorithm.
        See `Weights for Computing Vertex Normals from Facet Vectors <https://escholarship.org/content/qt7657d8h3/qt7657d8h3.pdf?t=ptt283>`_.

        Args
        ====
        vertices: torch.Tensor
            3D position of vertices
            float32 tensor with size num_vertices x 3
        indices: torch.Tensor
            vertex indices of triangle faces.
            int32 tensor with size num_triangles x 3

        Returns
        =======
        tf.Tensor
            float32 Tensor with size num_vertices x 3 representing vertex normal
    """

    def dot(v1, v2):
        return torch.sum(v1 * v2, dim = 1)
    def squared_length(v):
        return torch.sum(v * v, dim = 1)
    def length(v):
        return torch.sqrt(squared_length(v))
    def safe_asin(v):
        # Hack: asin(1)' is infinite, so we want to clamp the contribution
        return torch.asin(v.clamp(0, 1-1e-6))

    normals = torch.zeros(vertices.shape, dtype = torch.float32, device = vertices.device)
    v = [vertices[indices[:, 0].long(), :],
         vertices[indices[:, 1].long(), :],
         vertices[indices[:, 2].long(), :]]
    for i in range(3):
        v0 = v[i]
        v1 = v[(i + 1) % 3]
        v2 = v[(i + 2) % 3]
        e1 = v1 - v0
        e2 = v2 - v0
        e1_len = length(e1)
        e2_len = length(e2)
        side_a = e1 / torch.reshape(e1_len, [-1, 1])
        side_b = e2 / torch.reshape(e2_len, [-1, 1])
        if i == 0:
            n = torch.cross(side_a, side_b)
            n = torch.where(length(n).reshape(-1, 1).expand(-1, 3) > 0,
                n / torch.reshape(length(n), [-1, 1]),
                torch.zeros(n.shape, dtype=n.dtype, device=n.device))
        angle = torch.where(dot(side_a, side_b) < 0, 
            math.pi - 2.0 * safe_asin(0.5 * length(side_a + side_b)),
            2.0 * safe_asin(0.5 * length(side_b - side_a)))
        sin_angle = torch.sin(angle)
        
        # XXX: Inefficient but it's PyTorch's limitation
        e1e2 = e1_len * e2_len
        # contrib is 0 when e1e2 is 0
        contrib = torch.where(e1e2.reshape(-1, 1).expand(-1, 3) > 0,
            n * (sin_angle / e1e2).reshape(-1, 1).expand(-1, 3),
            torch.zeros(n.shape, dtype = torch.float32, device = vertices.device))
        index = indices[:, i].long().reshape(-1, 1).expand(-1, 3)
        normals.scatter_add_(0, index, contrib)

    # Assign 0, 0, 1 to degenerate faces
    degenerate_normals = torch.zeros(normals.shape, dtype = torch.float32, device = vertices.device)
    degenerate_normals[:, 2] = 1.0
    normals = torch.where(length(normals).reshape(-1, 1).expand(-1, 3) > 0,
        normals / torch.reshape(length(normals), [-1, 1]),
        degenerate_normals)
    assert(torch.isfinite(normals).all())
    return normals.contiguous()
Example #38
0
def nstep_target(idx,
                 policy_net,
                 target_net,
                 memory,
                 steps=20,
                 device='cpu',
                 BATCH_SIZE=32,
                 GAMMA=0.99,
                 double_dqn=False):
    range_ = np.arange(0, steps + 1)

    idx_nReward = idx.reshape(-1, 1) + range_

    _batch, _ = memory.sample(idx=idx_nReward.ravel())
    n_batch = Transition(*zip(*_batch))
    non_final_mask_rewards = torch.tensor(
        tuple(map(lambda s: s is not None, n_batch.next_state)),
        device=device,
        dtype=torch.bool).view(idx_nReward.shape)

    non_final_mask = torch.prod(non_final_mask_rewards[:, :-1], 1).bool()

    non_final_mask_r = non_final_mask_rewards[:, :-1]

    #####
    r23 = non_final_mask_r[:, :-1]
    r23 = r23.t().view(r23.shape[1], r23.shape[0],
                       1).expand(r23.shape[1], r23.shape[0], r23.shape[1])
    r12 = non_final_mask_r[:, 1:]
    r = torch.prod(r23, 0) * r12.long()
    r_mask = torch.cat([non_final_mask_rewards[:, 0].view(-1, 1).long(), r], 1)
    #####

    rewards = tuple((map(lambda r: torch.tensor([r], device=device),
                         n_batch.reward)))
    n_rewards = torch.cat(rewards).view(
        idx_nReward.shape)[:, 1:] * r_mask.float()

    gamma_n = np.geomspace(1, GAMMA**(steps - 1), steps)

    discounted_rewards = n_rewards * torch.from_numpy(gamma_n).float().to(
        device)
    discounted_rewards = torch.sum(discounted_rewards, axis=1).to(device)

    batch_future, _ = memory.sample(idx + steps - 1)
    batch_ = Transition(*zip(*batch_future))

    # non_final_next_states = torch.cat([s for s in batch_.next_state if s is not None]).to(device)

    next_states_ = [s for s in batch_.next_state]
    non_final_next_states_mask = torch.tensor(tuple(
        map(lambda s: s is not None, batch_.next_state)),
                                              device=device,
                                              dtype=torch.bool)

    non_final_mask = non_final_next_states_mask * non_final_mask

    non_final_next_states = torch.cat(
        itemgetter(*list(torch.where(
            non_final_mask == 1)[0]))(next_states_)).to(device)

    next_state_values = torch.zeros(BATCH_SIZE, device=device)

    if double_dqn:
        max_action = policy_net(non_final_next_states).max(
            1, keepdim=True)[1].detach()
        next_state_values[non_final_mask] = target_net(
            non_final_next_states).gather(1, max_action).detach().squeeze(1)

    else:
        next_state_values[non_final_mask] = target_net(
            non_final_next_states, double_dqn=double_dqn).max(1)[0].detach()
    # next_state_values[non_final_mask] = target_net(non_final_next_states).max(1)[0].detach()
    expected_state_action_values = (next_state_values *
                                    (GAMMA**steps)) + discounted_rewards

    return expected_state_action_values
Example #39
0
 def _sym_normalize_adj(self, adj):
     deg = torch.sum(adj, dim = 0)#.squeeze()
     deg_inv = torch.where(deg>0, 1./torch.sqrt(deg), torch.zeros(deg.size()))
     deg_inv = torch.diag(deg_inv)
     return torch.mm(deg_inv, torch.mm(adj, deg_inv))
Example #40
0
def _project(x, c):
    norm = torch.clamp_min(x.norm(dim=-1, keepdim=True, p=2), 1e-5)
    maxnorm = (1 - 1e-3) / (c ** 0.5)
    cond = norm > maxnorm
    projected = x / norm * maxnorm
    return torch.where(cond, projected, x)
Example #41
0
def test_pretrain(args, all_data):
    data_len = len(all_data)
    #
    # train_dataloader_list, valid_dataloader_list, test_dataloader_list, ent_emb_list, rel_update_weights, g_list \
    #     = get_all_clients(all_data, args)
    #
    # total_test_data_size = sum([len(test_dataloader_list[i].dataset) for i in range(data_len)])
    # eval_weights = [len(test_dataloader_list[i].dataset) / total_test_data_size for i in range(data_len)]

    embedding_range = torch.Tensor([(args.gamma + args.epsilon) / args.hidden_dim])
    kge_model = KGEModel(args, model_name=args.model)

    # rel_result = ddict(list)
    # rel_result_bydata = ddict(lambda : ddict(list))
    results = ddict(float)
    for i, data in enumerate(all_data):
        one_results = ddict(float)
        state = torch.load('../LTLE/fed_state/fb15k237_fed10_client_{}.best'.format(i), map_location=args.gpu)
        rel_embed = state['rel_emb'].detach()
        ent_embed = state['ent_emb'].detach()

        train_dataset, valid_dataset, test_dataset, nrelation, nentity = get_task_dataset(data, args)
        test_dataloader_tail = DataLoader(
            test_dataset,
            batch_size=args.test_batch_size,
            # num_workers=max(1, args.num_cpu),
            collate_fn=TestDataset.collate_fn
        )

        client_res = ddict(float)
        for batch in test_dataloader_tail:
            triplets, labels, mode = batch
            # triplets, labels, mode = next(test_dataloader_list[i].__iter__())
            triplets, labels = triplets.to(args.gpu), labels.to(args.gpu)
            head_idx, rel_idx, tail_idx = triplets[:, 0], triplets[:, 1], triplets[:, 2]
            pred = kge_model((triplets, None),
                              rel_embed,
                              ent_embed,
                              mode=mode)
            b_range = torch.arange(pred.size()[0], device=args.gpu)
            target_pred = pred[b_range, tail_idx]
            pred = torch.where(labels.byte(), -torch.ones_like(pred) * 10000000, pred)
            pred[b_range, tail_idx] = target_pred

            ranks = 1 + torch.argsort(torch.argsort(pred, dim=1, descending=True),
                                      dim=1, descending=False)[b_range, tail_idx]

            ranks = ranks.float()
            count = torch.numel(ranks)

            results['count'] += count
            results['mr'] += torch.sum(ranks).item()
            results['mrr'] += torch.sum(1.0 / ranks).item()

            one_results['count'] += count
            one_results['mr'] += torch.sum(ranks).item()
            one_results['mrr'] += torch.sum(1.0 / ranks).item()

            for k in [1, 5, 10]:
                results['hits@{}'.format(k)] += torch.numel(ranks[ranks <= k])
                one_results['hits@{}'.format(k)] += torch.numel(ranks[ranks <= k])

        for k, v in one_results.items():
            if k != 'count':
                one_results[k] = v / one_results['count']

        logging.info('mrr: {:.4f}, hits@1: {:.4f}, hits@5: {:.4f}, hits@10: {:.4f}'.format(
            one_results['mrr'], one_results['hits@1'],
            one_results['hits@5'], one_results['hits@10']))

    for k, v in results.items():
        if k != 'count':
            results[k] = v / results['count']

    logging.info('mrr: {:.4f}, hits@1: {:.4f}, hits@5: {:.4f}, hits@10: {:.4f}'.format(
        results['mrr'], results['hits@1'],
        results['hits@5'], results['hits@10']))

    return results
Example #42
0
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
    ):
        r"""
        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`, defaults to :obj:`None`):
            Labels for computing the token classification loss.
            Indices should be in ``[0, ..., config.num_labels - 1]``.

    Returns:
        :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.RobertaConfig`) and inputs:
        loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when ``labels`` is provided) :
            Classification loss.
        scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.num_labels)`)
            Classification scores (before SoftMax).
        hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``config.output_hidden_states=True``):
            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
            of shape :obj:`(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``config.output_attentions=True``):
            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.

    Examples::

        from transformers import RobertaTokenizer, RobertaForTokenClassification
        import torch

        tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
        model = RobertaForTokenClassification.from_pretrained('roberta-base')
        input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0)  # Batch size 1
        labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0)  # Batch size 1
        outputs = model(input_ids, labels=labels)
        loss, scores = outputs[:2]

        """

        outputs = self.roberta(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
        )

        sequence_output = outputs[0]

        sequence_output = self.dropout(sequence_output)
        logits = self.classifier(sequence_output)

        outputs = (logits, ) + outputs[
            2:]  # add hidden states and attention if they are here

        if labels is not None:
            loss_fct = CrossEntropyLoss()
            # Only keep active parts of the loss
            if attention_mask is not None:
                active_loss = attention_mask.view(-1) == 1
                active_logits = logits.view(-1, self.num_labels)
                active_labels = torch.where(
                    active_loss, labels.view(-1),
                    torch.tensor(loss_fct.ignore_index).type_as(labels))
                loss = loss_fct(active_logits, active_labels)
            else:
                loss = loss_fct(logits.view(-1, self.num_labels),
                                labels.view(-1))
            outputs = (loss, ) + outputs

        return outputs  # (loss), scores, (hidden_states), (attentions)
Example #43
0
def train(epoch):
    print("Training epoch {}".format(epoch))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []

    regr_rpn_loss = 0
    class_rpn_loss = 0
    total_rpn_loss = 0

    regr_class_loss = 0
    class_class_loss = 0
    total_class_loss = 0

    count_rpn = 0
    count_class = 0

    for i, (image, boxes, labels, temp, num_pos) in enumerate(train_loader):
        count_rpn += 1

        y_is_box_label = temp[0].to(device=device)
        y_rpn_regr = temp[1].to(device=device)
        image = Variable(image).to(device=device)
        boxes = boxes

        base_x, cls_k, reg_k = model_rpn(image)

        l1 = rpn_loss_regr(y_true=y_rpn_regr,
                           y_pred=reg_k,
                           y_is_box_label=y_is_box_label,
                           lambda_rpn_regr=args.lambda_rpn_regr,
                           device=device)
        l2 = rpn_loss_cls_fixed_num(y_pred=cls_k,
                                    y_is_box_label=y_is_box_label,
                                    lambda_rpn_class=args.lambda_rpn_class)

        regr_rpn_loss += l1.item()
        class_rpn_loss += l2.item()
        loss = l1 + l2
        total_rpn_loss += loss.item()

        optimizer_model_rpn.zero_grad()
        loss.backward()
        optimizer_model_rpn.step()
        with torch.no_grad():
            base_x, cls_k, reg_k = model_rpn(image)

        for b in range(args.train_batch):
            img_data = {}
            with torch.no_grad():
                # Convert rpn layer to roi bboxes
                # cls_k.shape : b, h, w, 9
                # reg_k : b, h, w, 36
                rpn_rois = rpn_to_roi(
                    cls_k[b, :],
                    reg_k[b, :],
                    no_anchors=num_anchors,
                    all_possible_anchor_boxes=all_possible_anchor_boxes_tensor.
                    clone())
                rpn_rois.to(device=device)
                # can't concatenate batch
                # no of boxes may vary across the batch
                img_data["boxes"] = boxes[b].to(device=device) // downscale
                img_data['labels'] = labels[b]
                # X2 are qualified anchor boxes from model_rpn (converted anochors)
                # Y1 are the label, Y1[-1] is the background bounding box (negative bounding box), ambigous (neutral boxes are eliminated < min overlap thresold)
                # Y2 is concat of 1 , tx, ty, tw, th and 0, tx, ty, tw, th
                X2, Y1, Y2, _ = calc_iou(rpn_rois,
                                         img_data,
                                         class_mapping=config.label_map)

                X2 = X2.to(device=device)
                Y1 = Y1.to(device=device)
                Y2 = Y2.to(device=device)

                # If X2 is None means there are no matching bboxes
                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue
                neg_samples = torch.where(Y1[:, -1] == 1)[0]
                pos_samples = torch.where(Y1[:, -1] == 0)[0]
                rpn_accuracy_rpn_monitor.append(pos_samples.size(0))
                rpn_accuracy_for_epoch.append(pos_samples.size(0))

            db = Dataset_roi(pos=pos_samples.cpu(), neg=neg_samples.cpu())
            roi_loader = DataLoader(db,
                                    shuffle=True,
                                    batch_size=args.n_roi // 2,
                                    num_workers=args.workers,
                                    pin_memory=pin_memory,
                                    drop_last=False)
            # list(roi_loader)
            for j, potential_roi in enumerate(roi_loader):
                pos = potential_roi[0]
                neg = potential_roi[1]
                if type(pos) == list:
                    rois = X2[neg]
                    rpn_base = base_x[b].unsqueeze(0)
                    Y11 = Y1[neg]
                    Y22 = Y2[neg]
                    # out_class : args.n_roi // 2 , # no of class
                elif type(neg) == list:
                    rois = X2[pos]
                    rpn_base = base_x[b].unsqueeze(0)
                    #out_class :  args.n_roi // 2 , # no of class
                    Y11 = Y1[pos]
                    Y22 = Y2[pos]
                else:
                    ind = torch.cat([pos, neg])
                    rois = X2[ind]
                    rpn_base = base_x[b].unsqueeze(0)
                    #out_class:  args.n_roi , # no of class
                    Y11 = Y1[ind]
                    Y22 = Y2[ind]
                count_class += 1
                rois = Variable(rois).to(device=device)
                out_class, out_regr = model_classifier(base_x=rpn_base,
                                                       rois=rois)

                l3 = class_loss_cls(y_true=Y11,
                                    y_pred=out_class,
                                    lambda_cls_class=args.lambda_cls_class)
                l4 = class_loss_regr(y_true=Y22,
                                     y_pred=out_regr,
                                     lambda_cls_regr=args.lambda_cls_regr)

                regr_class_loss += l4.item()
                class_class_loss += l3.item()

                loss = l3 + l4
                total_class_loss += loss.item()

                optimizer_classifier.zero_grad()
                loss.backward()
                optimizer_classifier.step()

                if count_class % args.display_class == 0:
                    if count_class == 0:
                        print(
                            '[Classifier] RPN Ex: {}-th ,Batch : {}, Anchor Box: {}-th, Classifier Model Classification loss: {} Regression loss: {} Total Loss: {}'
                            .format(i, b, j, 0, 0, 0))
                    else:
                        print(
                            '[Classifier] RPN Ex: {}-th ,Batch : {}, Anchor Box: {}-th, Classifier Model Classification loss: {} Regression loss: {} Total Loss: {} '
                            .format(i, b, j, class_class_loss / count_class,
                                    regr_class_loss / count_class,
                                    total_class_loss / count_class))

        if i % args.display_rpn == 0:
            if len(rpn_accuracy_rpn_monitor) == 0:
                print(
                    '[RPN] RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.'
                )
            else:
                mean_overlapping_bboxes = float(sum(
                    rpn_accuracy_rpn_monitor)) / len(rpn_accuracy_rpn_monitor)
                print(
                    '[RPN] Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                    .format(mean_overlapping_bboxes))
            print(
                '[RPN] RPN Ex: {}-th RPN Model Classification loss: {} Regression loss: {} Total Loss: {} '
                .format(i, class_rpn_loss / count_rpn,
                        regr_rpn_loss / count_rpn, total_rpn_loss / count_rpn))

    print("-- END OF EPOCH -- {}".format(epoch))
    print("------------------------------")
    print(
        '[RPN] RPN Ex: {}-th RPN Model Classification loss: {} Regression loss: {} Total Loss: {} '
        .format(i, class_rpn_loss / count_rpn, regr_rpn_loss / count_rpn,
                total_rpn_loss / count_rpn))
    if count_class == 0:
        print(
            '[Classifier] RPN Ex: {}-th ,Batch : {}, Anchor Box: {}-th, Classifier Model Classification loss: {} Regression loss: {} Total Loss: {}'
            .format(i, b, j, 0, 0, 0))
    else:
        print(
            '[Classifier] RPN Ex: {}-th ,Batch : {}, Anchor Box: {}-th, Classifier Model Classification loss: {} Regression loss: {} Total Loss: {} '
            .format(i, b, j, class_class_loss / count_class,
                    regr_class_loss / count_class,
                    total_class_loss / count_class))
    if len(rpn_accuracy_rpn_monitor) == 0:
        print(
            '[RPN] RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.'
        )
    else:
        mean_overlapping_bboxes = float(
            sum(rpn_accuracy_rpn_monitor)) / len(rpn_accuracy_rpn_monitor)
        print(
            '[RPN] Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
            .format(mean_overlapping_bboxes))
    print('Total Loss  {}'.format(total_class_loss / count_class +
                                  total_rpn_loss / count_rpn))
    print("------------------------------")
def L1_smooth_loss(x, y):
    abs_diff = torch.abs(x - y)
    abs_diff_lt_1 = torch.le(abs_diff, 1)
    return torch.mean(torch.where(abs_diff_lt_1, 0.5 * abs_diff ** 2, abs_diff - 0.5))
Example #45
0
    def search(
        self,
        start_predictions: torch.Tensor,
        start_state: Dict[str, torch.Tensor],
        step: StepFunctionType,
        fsm: torch.Tensor,
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        r"""
        Given a starting state, a step function, and an FSM adjacency matrix, apply Constrained
        Beam Search to find most likely target sequences satisfying specified constraints in FSM.

        .. note::

            If your step function returns ``-inf`` for some log probabilities
            (like if you're using a masked log-softmax) then some of the "best"
            sequences returned may also have ``-inf`` log probability. Specifically
            this happens when the beam size is smaller than the number of actions
            with finite log probability (non-zero probability) returned by the step function.
            Therefore if you're using a mask you may want to check the results from ``search``
            and potentially discard sequences with non-finite log probability.

        Parameters
        ----------
        start_predictions : torch.Tensor
            A tensor containing the initial predictions with shape ``(batch_size, )``. These are
            usually just ``@@BOUNDARY@@`` token indices.
        start_state : ``Dict[str, torch.Tensor]``
            The initial state passed to the ``step`` function. Each value of the state dict
            should be a tensor of shape ``(batch_size, *)``, where ``*`` means any other
            number of dimensions.
        step : ``StepFunctionType``
            A function that is responsible for computing the next most likely tokens, given the
            current state and the predictions from the last time step. The function should accept
            two arguments. The first being a tensor of shape ``(group_size,)``, representing the
            index of the predicted tokens from the last time step, and the second being the
            current state. The ``group_size`` will be ``batch_size * beam_size * num_fsm_states``
            except in the initial step, for which it will just be ``batch_size``. The function is
            expected to return a tuple, where the first element is a tensor of shape
            ``(group_size, vocab_size)`` containing the log probabilities of the tokens for the
            next step, and the second element is the updated state. The tensor in the state should
            have shape ``(group_size, *)``, where ``*`` means any other number of dimensions.

        Returns
        -------
        Tuple[torch.Tensor, torch.Tensor]
            Tuple of ``(predictions, log_probabilities)``, where ``predictions``
            has shape ``(batch_size, num_fsm_states, beam_size, max_steps)``
            and ``log_probabilities`` has shape ``(batch_size, num_fsm_states, beam_size)``.
        """
        # shape: (batch_size, num_fsm_states, num_fsm_states, vocab_size)
        batch_size, num_fsm_states, _, vocab_size = fsm.size()

        # List of (batch_size, num_fsm_states, beam_size) tensors. One for each time step. Does not
        # include the start symbols, which are implicit.
        predictions: List[torch.Tensor] = []

        # List of (batch_size, num_fsm_states, beam_size) tensors. One for each time step. None for
        # the first. Stores the index n for the parent prediction.
        backpointers: List[torch.Tensor] = []

        # Calculate the first timestep. This is done outside the main loop because we are going
        # from a single decoder input (the output from the encoder) to the top `beam_size`
        # decoder outputs per FSM state. On the other hand, within the main loop we are going
        # from the `beam_size` elements of the beam (per FSM state) to `beam_size`^2 candidates
        # from which we will select the top `beam_size` elements for the next iteration.

        # shape: start_class_log_probabilities (batch_size, vocab_size)
        # shape: state["h1"], state["c1"]... etc. (batch_size, hidden_size)

        start_class_log_probabilities, state = step(
            previous_predictions=start_predictions, states=start_state)
        vocab_size = start_class_log_probabilities.size(-1)

        start_state_predictions = start_class_log_probabilities.view(
            batch_size, 1, vocab_size).expand(batch_size, num_fsm_states,
                                              vocab_size)

        start_state_predictions = start_state_predictions.masked_fill(
            1 - fsm[:, 0, :, :], float("-inf"))

        # (batch_size, num_fsm_states, beam_size)
        start_top_log_probabilities, start_predicted_classes = start_state_predictions.topk(
            self.beam_size)
        # shape: (batch_size, num_fsm_states, beam_size)
        last_log_probabilities = start_top_log_probabilities

        predictions.append(start_predicted_classes.view(batch_size, -1))

        log_probs_after_end = torch.full(
            (1, vocab_size), float("-inf")).to(start_predictions.device)
        log_probs_after_end[:, self._end_index] = 0.0

        state = {
            key: _enlarge_single_tensor(value, batch_size, num_fsm_states,
                                        self.beam_size)
            for (key, value) in state.items()
        }

        step_state_mask = fsm.view(batch_size, num_fsm_states, num_fsm_states,
                                   1,
                                   vocab_size).expand(batch_size,
                                                      num_fsm_states,
                                                      num_fsm_states,
                                                      self.beam_size,
                                                      vocab_size)

        for timestep in range(self.max_steps - 1):
            # shape: (batch_size * beam_size * num_fsm_states, )
            last_predictions = predictions[-1].reshape(
                batch_size * self.beam_size * num_fsm_states)

            if (last_predictions == self._end_index).all():
                break
            class_log_probabilities, state = step(
                previous_predictions=last_predictions, states=state)
            last_predictions_expanded = (
                last_predictions.view(-1).unsqueeze(-1).expand(
                    batch_size * num_fsm_states * self.beam_size, vocab_size))

            cleaned_log_probabilities = torch.where(
                last_predictions_expanded == self._end_index,
                log_probs_after_end,
                class_log_probabilities,
            )
            cleaned_log_probabilities = cleaned_log_probabilities.view(
                batch_size, num_fsm_states, self.beam_size, vocab_size)

            restricted_predicted_classes = torch.LongTensor(
                batch_size, num_fsm_states,
                self.beam_size).to(start_predictions.device)
            restricted_beam_log_probs = torch.FloatTensor(
                batch_size, num_fsm_states,
                self.beam_size).to(start_predictions.device)
            restricted_beam_indices = torch.LongTensor(
                batch_size, num_fsm_states,
                self.beam_size).to(start_predictions.device)

            expanded_last_log_probabilities = last_log_probabilities.view(
                batch_size, num_fsm_states, self.beam_size,
                1).expand(batch_size, num_fsm_states, self.beam_size,
                          self.per_node_beam_size)

            for i in range(num_fsm_states):
                # shape (batch_size, num_fsm_states, self.beam_size, vocab_size)
                state_log_probabilities = cleaned_log_probabilities

                state_log_probabilities = state_log_probabilities.masked_fill(
                    1 - step_state_mask[:, :, i, :, :], -1e20)
                top_log_probabilities, predicted_classes = state_log_probabilities.topk(
                    self.per_node_beam_size)
                summed_top_log_probabilities = (
                    top_log_probabilities + expanded_last_log_probabilities)
                # shape: (batch_size, old_num_fsm_states * beam_size * per_node_beam_size)
                reshaped_summed = summed_top_log_probabilities.reshape(
                    batch_size, -1)

                # shape: (batch_size, old_num_fsm_states * beam_size * per_node_beam_size)
                reshaped_predicted_classes = predicted_classes.reshape(
                    batch_size, -1)

                # shape (batch_size, beam_size)
                state_beam_log_probs, state_beam_indices = reshaped_summed.topk(
                    self.beam_size)
                # shape (batch_size, beam_size)
                state_predicted_classes = reshaped_predicted_classes.gather(
                    1, state_beam_indices)

                restricted_predicted_classes[:, i, :] = state_predicted_classes
                restricted_beam_indices[:, i, :] = state_beam_indices
                restricted_beam_log_probs[:, i, :] = state_beam_log_probs

            restricted_predicted_classes = restricted_predicted_classes.view(
                batch_size, -1)
            predictions.append(restricted_predicted_classes)

            backpointer = restricted_beam_indices / self.per_node_beam_size
            backpointers.append(backpointer.view(batch_size, -1))

            last_log_probabilities = restricted_beam_log_probs.view(
                batch_size, num_fsm_states, -1)

            def track_back_state(state_tensor):
                _, *last_dims = state_tensor.size()
                # shape: (batch_size, beam_size, *)
                expanded_backpointer = backpointer.view(
                    batch_size, num_fsm_states * self.beam_size,
                    *([1] * len(last_dims))).expand(
                        batch_size, num_fsm_states * self.beam_size,
                        *last_dims)

                # shape: (batch_size * beam_size, *)
                return (state_tensor.reshape(
                    batch_size, num_fsm_states * self.beam_size,
                    *last_dims).gather(1, expanded_backpointer).reshape(
                        batch_size * num_fsm_states * self.beam_size,
                        *last_dims))

            state = {
                key: track_back_state(value)
                for (key, value) in state.items()
            }

        # Reconstruct the sequences.
        # shape: [(batch_size, beam_size, 1)]
        reconstructed_predictions = [predictions[-1].unsqueeze(2)]

        # shape: (batch_size, beam_size)
        cur_backpointers = backpointers[-1]

        for timestep in range(len(predictions) - 2, 0, -1):
            # shape: (batch_size, beam_size, 1)
            cur_preds = predictions[timestep].gather(
                1, cur_backpointers).unsqueeze(2)

            reconstructed_predictions.append(cur_preds)

            # shape: (batch_size, beam_size)
            cur_backpointers = backpointers[timestep - 1].gather(
                1, cur_backpointers)

        # shape: (batch_size, beam_size, 1)
        final_preds = predictions[0].gather(1, cur_backpointers).unsqueeze(2)

        reconstructed_predictions.append(final_preds)

        # shape: (batch_size, beam_size, max_steps)
        all_predictions = torch.cat(list(reversed(reconstructed_predictions)),
                                    2)
        all_predictions = all_predictions.view(batch_size, num_fsm_states,
                                               self.beam_size, -1)

        return all_predictions, last_log_probabilities
def test(model,
         device,
         test_loader,
         criterion,
         mode="raw-task",
         dataset="cifar10",
         poison_type="fashion"):
    class_correct = list(0. for i in range(10))
    class_total = list(0. for i in range(10))

    if dataset in ("mnist", "emnist"):
        target_class = 7
        if mode == "raw-task":
            classes = [str(i) for i in range(10)]
        elif mode == "targetted-task":
            if poison_type == 'ardis':
                classes = [str(i) for i in range(10)]
            else:
                classes = [
                    "T-shirt/top", "Trouser", "Pullover", "Dress", "Coat",
                    "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"
                ]
    elif dataset == "cifar10":
        classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog',
                   'horse', 'ship', 'truck')
        # target_class = 2 for greencar, 9 for southwest
        if poison_type in ("howto", "greencar-neo"):
            target_class = 2
        else:
            target_class = 9

    model.eval()
    test_loss = 0
    correct = 0
    backdoor_correct = 0
    backdoor_tot = 0
    final_acc = 0
    task_acc = None

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            _, predicted = torch.max(output, 1)
            c = (predicted == target).squeeze()

            #test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            test_loss += criterion(output, target).item()
            pred = output.argmax(
                dim=1,
                keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
            # check backdoor accuracy
            if poison_type == 'ardis':
                backdoor_index = torch.where(target == target_class)
                target_backdoor = torch.ones_like(target[backdoor_index])
                predicted_backdoor = predicted[backdoor_index]
                backdoor_correct += (
                    predicted_backdoor == target_backdoor).sum().item()
                backdoor_tot = backdoor_index[0].shape[0]
                # logger.info("Target: {}".format(target_backdoor))
                # logger.info("Predicted: {}".format(predicted_backdoor))

            #for image_index in range(test_batch_size):
            for image_index in range(len(target)):
                label = target[image_index]
                class_correct[label] += c[image_index].item()
                class_total[label] += 1
    test_loss /= len(test_loader.dataset)

    if mode == "raw-task":
        for i in range(10):
            logger.info('Accuracy of %5s : %.2f %%' %
                        (classes[i], 100 * class_correct[i] / class_total[i]))

            if i == target_class:
                task_acc = 100 * class_correct[i] / class_total[i]

        logger.info(
            '\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.
            format(test_loss, correct, len(test_loader.dataset),
                   100. * correct / len(test_loader.dataset)))
        final_acc = 100. * correct / len(test_loader.dataset)

    elif mode == "targetted-task":

        if dataset in ("mnist", "emnist"):
            for i in range(10):
                logger.info(
                    'Accuracy of %5s : %.2f %%' %
                    (classes[i], 100 * class_correct[i] / class_total[i]))
            if poison_type == 'ardis':
                # ensure 7 is being classified as 1
                logger.info(
                    'Backdoor Accuracy of %.2f : %.2f %%' %
                    (target_class, 100 * backdoor_correct / backdoor_tot))
                final_acc = 100 * backdoor_correct / backdoor_tot
            else:
                # trouser acc
                final_acc = 100 * class_correct[1] / class_total[1]

        elif dataset == "cifar10":
            logger.info(
                '#### Targetted Accuracy of %5s : %.2f %%' %
                (classes[target_class], 100 * class_correct[target_class] /
                 class_total[target_class]))
            final_acc = 100 * class_correct[target_class] / class_total[
                target_class]
    return final_acc, task_acc
    # 405 in svhn
    zeros = torch.zeros(1).cuda()
    ones = torch.ones(1).cuda()
    minimum = 999999999
    for threshold in thresholds:
        total = 0
        in_distr_tot = 0
        out_distr_tot = 0
        threshold = torch.tensor(threshold).cuda()
        for in_data, out_data in zip(testloader, svhntestloader):
            total += m
            # in distribution
            data_tmp = torch.tensor(in_data[0]).cuda()
            in_distr_predict = f.softmax(classifier(data_tmp), dim=1)
            max_probability = in_distr_predict.max(1)[0]
            in_distr_below_threshold = torch.where(
                max_probability <= threshold, max_probability, zeros)
            # for counting
            in_distr_below_threshold = torch.where(
                in_distr_below_threshold == 0, in_distr_below_threshold, ones)
            in_distr_tot += in_distr_below_threshold.sum()

            #out of distribution
            data_tmp = torch.tensor(out_data[0]).cuda()
            out_distr_predict = f.softmax(classifier(data_tmp), dim=1)
            max_probability = out_distr_predict.max(1)[0]
            out_distr_above_threshold = torch.where(
                max_probability > threshold, max_probability, zeros)
            # for counting
            out_distr_above_threshold = torch.where(
                out_distr_above_threshold == 0, out_distr_above_threshold,
                ones)