Esempio n. 1
0
    def forward(self, logits, target, mask=None):
        """LabelSmoothing Function with Mask

        Args:
            logits ([tensor]): logits with shape [batch, length, vocab_size]
            target ([tensor]): target with shape [batch, length]
            mask ([tensor], optional): mask tensor (bool) with shape [batch, length]
        """
        assert logits.dim() == 3 and logits.size(-1) == self.size

        pad_mask = target == self.padding_idx
        if mask is not None:
            mask = (pad_mask.int() + mask.int()) > 0
        else:
            mask = pad_mask

        logits = logits.reshape(-1, self.size)
        with flow.no_grad():
            confidence = logits.clone()
            confidence.fill_(self.smoothing / (self.size - 1))
            confidence = flow.scatter(confidence, 1,
                                      target.reshape(-1).unsqueeze(1),
                                      1 - self.smoothing)

        logsoftmax = nn.LogSoftmax(dim=-1)
        KLdiv = nn.KLDivLoss(reduction="none", log_target=False)
        loss = flow.sum(KLdiv(logsoftmax(logits), confidence), dim=-1)

        total = flow.sum(mask == 0)
        denom = total if self.normalize_length else logits.size(0)
        loss = flow.masked_fill(loss, mask.reshape(-1), 0.0)
        loss = flow.sum(loss) / denom

        return loss
Esempio n. 2
0
    def predict(self, pred, hidden=None):

        emb_inputs = self.embedding(pred)
        outputs, hidden = self.rnn(emb_inputs, hidden)
        logits = self.output_project(outputs)
        logsoftmax = nn.LogSoftmax(dim=-1)
        log_probs = logsoftmax(logits)
        return log_probs, hidden
Esempio n. 3
0
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()

        self.hidden_size = hidden_size

        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
Esempio n. 4
0
    def inference(self, preds, memory, memory_mask=None, cache=None):

        assert preds.dim() == 2
        logits, attn_weights = self.forward(preds, memory, memory_mask)
        logsoftmax = nn.LogSoftmax(dim=-1)
        log_probs = logsoftmax(logits[:, -1, :])

        return log_probs, cache, attn_weights
Esempio n. 5
0
 def compute_loss(self, logits, enc_length, targets, targets_length):
     logsoftmax = nn.LogSoftmax(dim=-1)
     log_probs = logsoftmax(logits)
     targets_length = targets_length.to(flow.int32)
     targets = targets.to(flow.int32)
     enc_length = enc_length.to(flow.int32)
     loss = self.ctc_crit(
         log_probs.transpose(0, 1), targets, enc_length, targets_length
     )
     return loss
Esempio n. 6
0
 def __init__(self):
     super(DomainClassifier, self).__init__()
     self.main = nn.Sequential(
         Down2d(1, 8, (4, 4), (2, 2), (5, 1)),
         Down2d(8, 16, (4, 4), (2, 2), (1, 1)),
         Down2d(16, 32, (4, 4), (2, 2), (0, 1)),
         Down2d(32, 16, (3, 4), (1, 2), (1, 1)),
         nn.Conv2d(16, 4, (1, 4), (1, 2), (0, 1)),
         nn.AvgPool2d((1, 16)),
         nn.LogSoftmax(),
     )
Esempio n. 7
0
    def inference(self, memory, memory_mask):

        if self.apply_look_ahead:
            memory = F.pad(memory, pad=(0, 0, 0, self.lookahead_steps), value=0.0)
            memory = memory.transpose(1, 2)
            memory = self.lookahead_conv(memory)
            memory = memory.transpose(1, 2)

        logits = self.output_layer(memory)
        memory_length = flow.sum(memory_mask.squeeze(1), dim=-1)
        logsoftmax = nn.LogSoftmax(dim=-1)
        return logsoftmax(logits), memory_length
Esempio n. 8
0
    def forward(self, data_batch):
        """Forward pass through Wav2Letter network than 
            takes log probability of output

        Args:
            data_batch (int): mini batch of data
             shape (batch, num_features, frame_len)

        Returns:
            log_probs (oneflow.Tensor):
                shape  (batch_size, num_classes, output_len)
        """
        y_pred = self.layers(data_batch)
        log_probs = nn.LogSoftmax(dim=1)(y_pred)

        return log_probs
Esempio n. 9
0
    def predict(self, targets, last_frame=True):

        dec_output = self.embedding(targets)
        dec_output, _ = self.pos_embedding(dec_output)

        dec_mask = get_seq_mask(targets)

        for _, block in enumerate(self.blocks):
            dec_output, _ = block(dec_output, dec_mask)

        if self.normalize_before:
            dec_output = self.after_norm(dec_output)

        logits = self.output_project(dec_output)
        logsoftmax = nn.LogSoftmax(dim=-1)
        if last_frame:
            log_probs = logsoftmax(logits[:, -1, :].unsqueeze(1))
        else:
            log_probs = logsoftmax(logits)

        return log_probs
Esempio n. 10
0
def act_fun(act_type):
    if act_type == "relu":
        return nn.ReLU()

    if act_type == "tanh":
        return nn.Tanh()

    if act_type == "sigmoid":
        return nn.Sigmoid()

    if act_type == "leaky_relu":
        return nn.LeakyReLU(0.2)

    if act_type == "elu":
        return nn.ELU()

    if act_type == "softmax":
        return nn.LogSoftmax(dim=1)

    if act_type == "linear":
        return nn.LeakyReLU(1)
Esempio n. 11
0
 def recognize(self, inputs, inputs_length):
     memory, memory_mask = self.encoder(inputs, inputs_length)
     logits = self.assistor(memory, return_logits=True)
     memory_length = flow.sum(memory_mask.squeeze(1), dim=-1)
     logsoftmax = nn.LogSoftmax(dim=-1)
     return logsoftmax(logits), memory_length
Esempio n. 12
0
 def __init__(self, input_size, output_size, hidden_size):
     super(MLP, self).__init__()
     self.fc1 = nn.Linear(input_size, hidden_size)
     self.fc2 = nn.Linear(hidden_size, output_size)
     self.relu = nn.ReLU()
     self.log_soft = nn.LogSoftmax(1)
Esempio n. 13
0
    def __init__(
        self,
        num_classes=1000,
        width_mult=1.0,
        inverted_residual_setting=None,
        round_nearest=8,
    ):
        """
        MobileNet V2 main class

        Args:
            num_classes (int): Number of classes
            width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
            inverted_residual_setting: Network structure
            round_nearest (int): Round the number of channels in each layer to be a multiple of this number
            Set to 1 to turn off rounding
        """
        super(MobileNetV2, self).__init__()
        block = InvertedResidual
        input_channel = 32
        last_channel = 1280

        if inverted_residual_setting is None:
            inverted_residual_setting = [
                # t, c, n, s
                [1, 16, 1, 1],
                [6, 24, 2, 2],
                [6, 32, 3, 2],
                [6, 64, 4, 2],
                [6, 96, 3, 1],
                [6, 160, 3, 2],
                [6, 320, 1, 1],
            ]

        # only check the first element, assuming user knows t,c,n,s are required
        if (len(inverted_residual_setting) == 0
                or len(inverted_residual_setting[0]) != 4):
            raise ValueError("inverted_residual_setting should be non-empty "
                             "or a 4-element list, got {}".format(
                                 inverted_residual_setting))

        # building first layer
        input_channel = _make_divisible(input_channel * width_mult,
                                        round_nearest)
        self.last_channel = _make_divisible(
            last_channel * max(1.0, width_mult), round_nearest)
        features = [ConvBNReLU(1, input_channel, stride=2)]
        # building inverted residual blocks
        for t, c, n, s in inverted_residual_setting:
            output_channel = _make_divisible(c * width_mult, round_nearest)
            for i in range(n):
                stride = s if i == 0 else 1
                features.append(
                    block(input_channel,
                          output_channel,
                          stride,
                          expand_ratio=t))
                input_channel = output_channel
        # building last several layers
        features.append(
            ConvBNReLU(input_channel, self.last_channel, kernel_size=1))
        # make it nn.Sequential
        self.features = nn.Sequential(*features)

        # building classifier
        self.classifier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(self.last_channel, num_classes[0]),
            nn.LogSoftmax(dim=1),
        )

        self.normalize = nn.BatchNorm1d(6420)
        self.maxpool1d = nn.MaxPool1d(3, stride=2)

        # weight initialization
        for m in self.modules():
            if isinstance(m, nn.Conv1d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out")
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm1d):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.zeros_(m.bias)