Esempio n. 1
0
 def forward(self, x):
     batch_size = x.size()[0]
     h_x = x.size()[2]
     w_x = x.size()[3]
     count_h = self.tensor_size(x[:, :, 1:, :])
     count_w = self.tensor_size(x[:, :, :, 1:])
     h_tv = flow.pow((x[:, :, 1:, :] - x[:, :, :h_x - 1, :]), 2).sum()
     w_tv = flow.pow((x[:, :, :, 1:] - x[:, :, :, :w_x - 1]), 2).sum()
     return self.tv_loss_weight * 2 * (h_tv / count_h +
                                       w_tv / count_w) / batch_size
Esempio n. 2
0
    def forward(self, inputs, targets):
        """
        Args:
            inputs (torch.Tensor): feature matrix with shape (batch_size, feat_dim).
            targets (torch.LongTensor): ground truth labels with shape (num_classes).
        """
        n = inputs.size(0)

        # Compute pairwise distance, replace by the official when merged
        dist = flow.pow(inputs, 2).sum(dim=1).expand(n, n)
        dist = dist + flow.transpose(dist, dim0=1, dim1=0)
        temp1 = -2 * flow.matmul(inputs, flow.transpose(inputs, dim0=1,
                                                        dim1=0))
        dist = flow.add(dist, temp1)
        dist = flow.sqrt(flow.clamp(dist, min=1e-12))
        # For each anchor, find the hardest positive and negative
        mask = targets.expand(n, n).eq(
            flow.transpose(targets.expand(n, n), dim0=1, dim1=0))
        dist_ap, dist_an = [], []
        y1 = flow.zeros((1, n), dtype=flow.float32).to("cuda")
        y2 = flow.Tensor(np.exp(100 * np.ones((1, n)))).to("cuda")

        for i in range(n):
            temp_dist = flow.slice(dist, [(i, i + 1, 1)])
            temp_mask = flow.slice(mask, [(i, i + 1, 1)])
            temp_mask_rev = flow.slice(1 - mask, [(i, i + 1, 1)])
            dist_ap.append(temp_mask.where(temp_dist, y1).max().unsqueeze(0))
            dist_an.append(
                temp_mask_rev.where(temp_dist, y2).min().unsqueeze(0))
        dist_ap = flow.cat(dist_ap)
        dist_an = flow.cat(dist_an)

        # Compute ranking hinge loss
        y = flow.ones_like(dist_an)
        return self.ranking_loss(dist_an, dist_ap, y)
Esempio n. 3
0
def gelu(x):
    """
    Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT). Also see
    the Gaussian Error Linear Units paper: https://arxiv.org/abs/1606.08415
    """
    return (0.5 * x * (1.0 + flow.tanh(
        math.sqrt(2.0 / math.pi) * (x + 0.044715 * flow.pow(x, 3.0)))))
Esempio n. 4
0
def _pow(self, b):
    return flow.pow(self, b)
Esempio n. 5
0
 def forward(self, x):
     return flow.pow(x, 2.0)
Esempio n. 6
0
    def recognize(self, inputs, inputs_mask):

        cache = {"fronend": None, "encoder": None, "decoder": None, "lm": None}

        self.attn_weights = {}
        memory, memory_mask, _, enc_attn_weights = self.encode(
            inputs, inputs_mask)

        self.attn_weights["encoder"] = enc_attn_weights
        self.attn_weights["decoder"] = []

        b, t, v = memory.size()

        beam_memory = (memory.unsqueeze(1).repeat(
            [1, self.beam_width, 1, 1]).view(b * self.beam_width, t, v))
        beam_memory_mask = (memory_mask.unsqueeze(1).repeat(
            [1, self.beam_width, 1]).view(b * self.beam_width, t))

        preds = (flow.ones(
            [b * self.beam_width, 1], dtype=flow.int64, device=memory.device) *
                 BOS)

        scores = flow.tensor([0.0] + [-float("inf")] * (self.beam_width - 1),
                             dtype=flow.float32)
        scores = scores.to(memory.device).repeat([b]).unsqueeze(1)
        ending_flag = flow.zeros_like(scores).to(dtype=flow.uint8)

        with flow.no_grad():
            for _ in range(1, self.max_len + 1):
                preds, cache, scores, ending_flag = self.decode_step(
                    preds, beam_memory, beam_memory_mask, cache, scores,
                    ending_flag)

                # whether stop or not
                if ending_flag.sum() == b * self.beam_width:
                    break

            scores = scores.view(b, self.beam_width)
            preds = preds.view(b, self.beam_width, -1)

            lengths = flow.sum(flow.ne(preds, EOS).float(), dim=-1)

            # length penalty
            if self.penalty:
                lp = flow.pow((self.lamda + lengths) / (self.lamda + 1),
                              self.penalty)
                scores /= lp

            sorted_scores, offset_indices = flow.sort(scores,
                                                      dim=-1,
                                                      descending=True)

            base_indices = (flow.arange(
                b, dtype=flow.int64, device=offset_indices.device) *
                            self.beam_width)
            base_indices = (base_indices.unsqueeze(1).repeat(
                [1, self.beam_width]).view(-1))
            preds = preds.view(b * self.beam_width, -1)
            indices = offset_indices.view(-1) + base_indices

            # remove BOS
            sorted_preds = preds[indices].view(b, self.beam_width, -1)
            nbest_preds = sorted_preds[:, :min(self.beam_width, self.nbest),
                                       1:]
            nbest_scores = sorted_scores[:, :min(self.beam_width, self.nbest)]

        return self.nbest_translate(nbest_preds), nbest_scores