def __init__(self, input_dim, num_class, CONFIG): super(Model, self).__init__() self.bidirectional = True self._hid_dim = 256 self._rnn2_input_size = self._hid_dim * 2 if self.bidirectional else self._hid_dim self._ffn_input_size = self._hid_dim * 2 if self.bidirectional else self._hid_dim # Define RNN model self.rnn1 = nn.LSTM(input_size=input_dim * 2, hidden_size=256, num_layers=1, batch_first=True, dropout=0, bidirectional=True) self.rnn2 = nn.LSTM(input_size=self._rnn2_input_size, hidden_size=256, num_layers=1, batch_first=True, dropout=0, bidirectional=True) self.rnn3 = nn.LSTM(input_size=self._rnn2_input_size, hidden_size=256, num_layers=1, batch_first=True, dropout=0, bidirectional=True) self.rnn4 = nn.LSTM(input_size=self._rnn2_input_size, hidden_size=256, num_layers=1, batch_first=True, dropout=0, bidirectional=True) self.rnn5 = nn.LSTM(input_size=self._rnn2_input_size, hidden_size=256, num_layers=1, batch_first=True, dropout=0, bidirectional=True) # Define FFN after RNN self.fc = nn.Linear(self._ffn_input_size, num_class + 1) self.dropout = nn.Dropout(CONFIG['dropout']) self.use_cuda = CONFIG['cuda'] self.ctc_loss = ctc.CTCLoss() self.blank = num_class self.clip = CONFIG['optimizer']['clip'] self.reduce_factor = 2 self.ngram_lm = n_gram_lm(lm_path='WSJ_Character_7.lm', n_gram=3, character_set='Character_Set.txt') if CONFIG['xavier']: self.init_weights()
def loss(self, batch): x, y, x_lens, y_lens = self.collate(*batch) out = self.forward_impl(x) loss_fn = ctc.CTCLoss() loss = loss_fn(out, y, x_lens, y_lens) return loss
def awni_loss(out, labels, input_lens, label_lens, blank_idx): """Calculates the loss using awni hannun's warpctc bindings. Only works with pytorch 0.4. """ import functions.ctc as ctc #awni hannun's ctc bindings loss_fn = ctc.CTCLoss(blank_label=blank_idx) loss = loss_fn(out, labels, input_lens, label_lens) return loss
def __init__(self, input_dim, num_class, CONFIG): super(Model, self).__init__() self.bidirectional = True self._hid_dim = 256 self._rnn2_input_size = self._hid_dim * 2 if self.bidirectional else self._hid_dim self._ffn_input_size = self._hid_dim * 2 if self.bidirectional else self._hid_dim # Define RNN model self.rnn1 = nn.LSTM(input_size=input_dim * 2, hidden_size=256, num_layers=1, batch_first=True, dropout=0, bidirectional=True) self.rnn2 = nn.LSTM(input_size=self._rnn2_input_size, hidden_size=256, num_layers=1, batch_first=True, dropout=0, bidirectional=True) self.rnn3 = nn.LSTM(input_size=self._rnn2_input_size, hidden_size=256, num_layers=1, batch_first=True, dropout=0, bidirectional=True) self.rnn4 = nn.LSTM(input_size=self._rnn2_input_size, hidden_size=256, num_layers=1, batch_first=True, dropout=0, bidirectional=True) self.rnn5 = nn.LSTM(input_size=self._rnn2_input_size, hidden_size=256, num_layers=1, batch_first=True, dropout=0, bidirectional=True) # Define FFN after RNN self.fc = nn.Linear(self._ffn_input_size, num_class + 1) self.dropout = nn.Dropout(0.1) self.use_cuda = CONFIG['cuda'] self.ctc_loss = ctc.CTCLoss() self.blank = num_class self.clip = CONFIG['optimizer']['clip'] self.reduce_factor = 2
def test_model(test_label): """ this checks if there are nans in a variety of test scenarios Arguments: test_label (str): the label passed into alter_logspec """ with open(CONFIG_PATH, 'r') as fid: config = json.load(fid) opt_cfg = config["optimizer"] model_cfg = config["model"] model, preproc = load(MODEL_PATH) optimizer = torch.optim.SGD(model.parameters(), lr=opt_cfg["learning_rate"], momentum=opt_cfg["momentum"]) log_spec, samp_rate = load_audio(AUDIO_PATH, preproc) inputs = alter_logspec(log_spec, alter=test_label) labels = [preproc.encode(TEXT)] batch = ((inputs, labels)) x, y, x_lens, y_lens = model.collate(*batch) out, rnn_args = model.forward_impl(x) loss_fn = ctc.CTCLoss() loss = loss_fn(out, y, x_lens, y_lens) loss.backward() loss = loss.item() grad_norm = nn.utils.clip_grad_norm_(model.parameters(), 200) print(f"loss: {loss}, grad_norm: {grad_norm}") optimizer.step() print(f"Are there nan's? : {check_nan(model)}")
def loss(self, out, batch): x, y, x_lens, y_lens = self.collate(*batch) batch_size, _, out_dim = out.size() loss_fn = ctc.CTCLoss() loss = loss_fn(out, y, x_lens, y_lens) return loss