def __init__(self, input_dim, num_class, CONFIG):
        super(Model, self).__init__()

        self.bidirectional = True
        self._hid_dim = 256
        self._rnn2_input_size = self._hid_dim * 2 if self.bidirectional else self._hid_dim
        self._ffn_input_size = self._hid_dim * 2 if self.bidirectional else self._hid_dim

        # Define RNN model
        self.rnn1 = nn.LSTM(input_size=input_dim * 2,
                            hidden_size=256,
                            num_layers=1,
                            batch_first=True,
                            dropout=0,
                            bidirectional=True)

        self.rnn2 = nn.LSTM(input_size=self._rnn2_input_size,
                            hidden_size=256,
                            num_layers=1,
                            batch_first=True,
                            dropout=0,
                            bidirectional=True)

        self.rnn3 = nn.LSTM(input_size=self._rnn2_input_size,
                            hidden_size=256,
                            num_layers=1,
                            batch_first=True,
                            dropout=0,
                            bidirectional=True)

        self.rnn4 = nn.LSTM(input_size=self._rnn2_input_size,
                            hidden_size=256,
                            num_layers=1,
                            batch_first=True,
                            dropout=0,
                            bidirectional=True)

        self.rnn5 = nn.LSTM(input_size=self._rnn2_input_size,
                            hidden_size=256,
                            num_layers=1,
                            batch_first=True,
                            dropout=0,
                            bidirectional=True)

        # Define FFN after RNN
        self.fc = nn.Linear(self._ffn_input_size, num_class + 1)

        self.dropout = nn.Dropout(CONFIG['dropout'])

        self.use_cuda = CONFIG['cuda']
        self.ctc_loss = ctc.CTCLoss()
        self.blank = num_class
        self.clip = CONFIG['optimizer']['clip']
        self.reduce_factor = 2
        self.ngram_lm = n_gram_lm(lm_path='WSJ_Character_7.lm',
                                  n_gram=3,
                                  character_set='Character_Set.txt')

        if CONFIG['xavier']:
            self.init_weights()
    def loss(self, batch):
        x, y, x_lens, y_lens = self.collate(*batch)
        out = self.forward_impl(x)

        loss_fn = ctc.CTCLoss()
        loss = loss_fn(out, y, x_lens, y_lens)
        return loss
Beispiel #3
0
def awni_loss(out, labels, input_lens, label_lens, blank_idx):
    """Calculates the loss using awni hannun's warpctc bindings.
    Only works with pytorch 0.4.
    """
    import functions.ctc as ctc  #awni hannun's ctc bindings
    loss_fn = ctc.CTCLoss(blank_label=blank_idx)
    loss = loss_fn(out, labels, input_lens, label_lens)
    return loss
    def __init__(self, input_dim, num_class, CONFIG):
        super(Model, self).__init__()

        self.bidirectional = True
        self._hid_dim = 256
        self._rnn2_input_size = self._hid_dim * 2 if self.bidirectional else self._hid_dim
        self._ffn_input_size = self._hid_dim * 2 if self.bidirectional else self._hid_dim

        # Define RNN model
        self.rnn1 = nn.LSTM(input_size=input_dim * 2,
                            hidden_size=256,
                            num_layers=1,
                            batch_first=True,
                            dropout=0,
                            bidirectional=True)

        self.rnn2 = nn.LSTM(input_size=self._rnn2_input_size,
                            hidden_size=256,
                            num_layers=1,
                            batch_first=True,
                            dropout=0,
                            bidirectional=True)

        self.rnn3 = nn.LSTM(input_size=self._rnn2_input_size,
                            hidden_size=256,
                            num_layers=1,
                            batch_first=True,
                            dropout=0,
                            bidirectional=True)

        self.rnn4 = nn.LSTM(input_size=self._rnn2_input_size,
                            hidden_size=256,
                            num_layers=1,
                            batch_first=True,
                            dropout=0,
                            bidirectional=True)

        self.rnn5 = nn.LSTM(input_size=self._rnn2_input_size,
                            hidden_size=256,
                            num_layers=1,
                            batch_first=True,
                            dropout=0,
                            bidirectional=True)

        # Define FFN after RNN
        self.fc = nn.Linear(self._ffn_input_size, num_class + 1)

        self.dropout = nn.Dropout(0.1)

        self.use_cuda = CONFIG['cuda']
        self.ctc_loss = ctc.CTCLoss()
        self.blank = num_class
        self.clip = CONFIG['optimizer']['clip']
        self.reduce_factor = 2
Beispiel #5
0
def test_model(test_label):
    """
        this checks if there are nans in a variety of test scenarios
        Arguments:
        test_label (str): the label passed into alter_logspec
    """

    with open(CONFIG_PATH, 'r') as fid:
        config = json.load(fid)

    opt_cfg = config["optimizer"]
    model_cfg = config["model"]

    model, preproc = load(MODEL_PATH)

    optimizer = torch.optim.SGD(model.parameters(),
                                lr=opt_cfg["learning_rate"],
                                momentum=opt_cfg["momentum"])

    log_spec, samp_rate = load_audio(AUDIO_PATH, preproc)

    inputs = alter_logspec(log_spec, alter=test_label)
    labels = [preproc.encode(TEXT)]

    batch = ((inputs, labels))
    x, y, x_lens, y_lens = model.collate(*batch)
    out, rnn_args = model.forward_impl(x)
    loss_fn = ctc.CTCLoss()
    loss = loss_fn(out, y, x_lens, y_lens)
    loss.backward()
    loss = loss.item()
    grad_norm = nn.utils.clip_grad_norm_(model.parameters(), 200)
    print(f"loss: {loss}, grad_norm: {grad_norm}")
    optimizer.step()

    print(f"Are there nan's? : {check_nan(model)}")
Beispiel #6
0
 def loss(self, out, batch):
     x, y, x_lens, y_lens = self.collate(*batch)
     batch_size, _, out_dim = out.size()
     loss_fn = ctc.CTCLoss()
     loss = loss_fn(out, y, x_lens, y_lens)
     return loss