def forward(self, logits, target, mask=None): """LabelSmoothing Function with Mask Args: logits ([tensor]): logits with shape [batch, length, vocab_size] target ([tensor]): target with shape [batch, length] mask ([tensor], optional): mask tensor (bool) with shape [batch, length] """ assert logits.dim() == 3 and logits.size(-1) == self.size pad_mask = target == self.padding_idx if mask is not None: mask = (pad_mask.int() + mask.int()) > 0 else: mask = pad_mask logits = logits.reshape(-1, self.size) with flow.no_grad(): confidence = logits.clone() confidence.fill_(self.smoothing / (self.size - 1)) confidence = flow.scatter(confidence, 1, target.reshape(-1).unsqueeze(1), 1 - self.smoothing) logsoftmax = nn.LogSoftmax(dim=-1) KLdiv = nn.KLDivLoss(reduction="none", log_target=False) loss = flow.sum(KLdiv(logsoftmax(logits), confidence), dim=-1) total = flow.sum(mask == 0) denom = total if self.normalize_length else logits.size(0) loss = flow.masked_fill(loss, mask.reshape(-1), 0.0) loss = flow.sum(loss) / denom return loss
def predict(self, pred, hidden=None): emb_inputs = self.embedding(pred) outputs, hidden = self.rnn(emb_inputs, hidden) logits = self.output_project(outputs) logsoftmax = nn.LogSoftmax(dim=-1) log_probs = logsoftmax(logits) return log_probs, hidden
def __init__(self, input_size, hidden_size, output_size): super().__init__() self.hidden_size = hidden_size self.i2h = nn.Linear(input_size + hidden_size, hidden_size) self.i2o = nn.Linear(input_size + hidden_size, output_size) self.softmax = nn.LogSoftmax(dim=1)
def inference(self, preds, memory, memory_mask=None, cache=None): assert preds.dim() == 2 logits, attn_weights = self.forward(preds, memory, memory_mask) logsoftmax = nn.LogSoftmax(dim=-1) log_probs = logsoftmax(logits[:, -1, :]) return log_probs, cache, attn_weights
def compute_loss(self, logits, enc_length, targets, targets_length): logsoftmax = nn.LogSoftmax(dim=-1) log_probs = logsoftmax(logits) targets_length = targets_length.to(flow.int32) targets = targets.to(flow.int32) enc_length = enc_length.to(flow.int32) loss = self.ctc_crit( log_probs.transpose(0, 1), targets, enc_length, targets_length ) return loss
def __init__(self): super(DomainClassifier, self).__init__() self.main = nn.Sequential( Down2d(1, 8, (4, 4), (2, 2), (5, 1)), Down2d(8, 16, (4, 4), (2, 2), (1, 1)), Down2d(16, 32, (4, 4), (2, 2), (0, 1)), Down2d(32, 16, (3, 4), (1, 2), (1, 1)), nn.Conv2d(16, 4, (1, 4), (1, 2), (0, 1)), nn.AvgPool2d((1, 16)), nn.LogSoftmax(), )
def inference(self, memory, memory_mask): if self.apply_look_ahead: memory = F.pad(memory, pad=(0, 0, 0, self.lookahead_steps), value=0.0) memory = memory.transpose(1, 2) memory = self.lookahead_conv(memory) memory = memory.transpose(1, 2) logits = self.output_layer(memory) memory_length = flow.sum(memory_mask.squeeze(1), dim=-1) logsoftmax = nn.LogSoftmax(dim=-1) return logsoftmax(logits), memory_length
def forward(self, data_batch): """Forward pass through Wav2Letter network than takes log probability of output Args: data_batch (int): mini batch of data shape (batch, num_features, frame_len) Returns: log_probs (oneflow.Tensor): shape (batch_size, num_classes, output_len) """ y_pred = self.layers(data_batch) log_probs = nn.LogSoftmax(dim=1)(y_pred) return log_probs
def predict(self, targets, last_frame=True): dec_output = self.embedding(targets) dec_output, _ = self.pos_embedding(dec_output) dec_mask = get_seq_mask(targets) for _, block in enumerate(self.blocks): dec_output, _ = block(dec_output, dec_mask) if self.normalize_before: dec_output = self.after_norm(dec_output) logits = self.output_project(dec_output) logsoftmax = nn.LogSoftmax(dim=-1) if last_frame: log_probs = logsoftmax(logits[:, -1, :].unsqueeze(1)) else: log_probs = logsoftmax(logits) return log_probs
def act_fun(act_type): if act_type == "relu": return nn.ReLU() if act_type == "tanh": return nn.Tanh() if act_type == "sigmoid": return nn.Sigmoid() if act_type == "leaky_relu": return nn.LeakyReLU(0.2) if act_type == "elu": return nn.ELU() if act_type == "softmax": return nn.LogSoftmax(dim=1) if act_type == "linear": return nn.LeakyReLU(1)
def recognize(self, inputs, inputs_length): memory, memory_mask = self.encoder(inputs, inputs_length) logits = self.assistor(memory, return_logits=True) memory_length = flow.sum(memory_mask.squeeze(1), dim=-1) logsoftmax = nn.LogSoftmax(dim=-1) return logsoftmax(logits), memory_length
def __init__(self, input_size, output_size, hidden_size): super(MLP, self).__init__() self.fc1 = nn.Linear(input_size, hidden_size) self.fc2 = nn.Linear(hidden_size, output_size) self.relu = nn.ReLU() self.log_soft = nn.LogSoftmax(1)
def __init__( self, num_classes=1000, width_mult=1.0, inverted_residual_setting=None, round_nearest=8, ): """ MobileNet V2 main class Args: num_classes (int): Number of classes width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount inverted_residual_setting: Network structure round_nearest (int): Round the number of channels in each layer to be a multiple of this number Set to 1 to turn off rounding """ super(MobileNetV2, self).__init__() block = InvertedResidual input_channel = 32 last_channel = 1280 if inverted_residual_setting is None: inverted_residual_setting = [ # t, c, n, s [1, 16, 1, 1], [6, 24, 2, 2], [6, 32, 3, 2], [6, 64, 4, 2], [6, 96, 3, 1], [6, 160, 3, 2], [6, 320, 1, 1], ] # only check the first element, assuming user knows t,c,n,s are required if (len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4): raise ValueError("inverted_residual_setting should be non-empty " "or a 4-element list, got {}".format( inverted_residual_setting)) # building first layer input_channel = _make_divisible(input_channel * width_mult, round_nearest) self.last_channel = _make_divisible( last_channel * max(1.0, width_mult), round_nearest) features = [ConvBNReLU(1, input_channel, stride=2)] # building inverted residual blocks for t, c, n, s in inverted_residual_setting: output_channel = _make_divisible(c * width_mult, round_nearest) for i in range(n): stride = s if i == 0 else 1 features.append( block(input_channel, output_channel, stride, expand_ratio=t)) input_channel = output_channel # building last several layers features.append( ConvBNReLU(input_channel, self.last_channel, kernel_size=1)) # make it nn.Sequential self.features = nn.Sequential(*features) # building classifier self.classifier = nn.Sequential( nn.Dropout(0.2), nn.Linear(self.last_channel, num_classes[0]), nn.LogSoftmax(dim=1), ) self.normalize = nn.BatchNorm1d(6420) self.maxpool1d = nn.MaxPool1d(3, stride=2) # weight initialization for m in self.modules(): if isinstance(m, nn.Conv1d): nn.init.kaiming_normal_(m.weight, mode="fan_out") if m.bias is not None: nn.init.zeros_(m.bias) elif isinstance(m, nn.BatchNorm1d): nn.init.ones_(m.weight) nn.init.zeros_(m.bias) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) nn.init.zeros_(m.bias)