Esempio n. 1
0
class Recurrent(Layer):
    def __init__(self, units, length, stateful=False, *args, **kwargs):
        super(Recurrent, self).__init__(*args, **kwargs)
        self.units = units
        self.length = length
        self.output_dim = [length, units]
        self.stateful = stateful
        self.states = None
        if self.input_dim is not None:
            self.build(self.input_dim)

    @property
    def params(self):
        return list(self.layer.parameters())

    def build(self, input_dim):
        self.input_dim = input_dim
        self.layer = TorchRecurrent(self.input_dim, self.units, self.length)

    def clear_states(self):
        self.states = None

    def forward(self, X):
        X = super(Recurrent, self).forward(X)
        if self.stateful and self.states is not None:
            outputs, self.states = self.layer.forward(X, self.states)
        else:
            outputs, self.states = self.layer.forward(X)

        return outputs
class IndRNN(Module):
    def __init__(self, hidden_size, *args, **kwargs):
        super().__init__()
        self.module = RNN(hidden_size=hidden_size, *args, **kwargs, nonlinearity='relu')

        # Terrible temporary solution to an issue regarding compacting weights re: CUDNN RNN
        # I'm not sure what is going on here, this is what weight_drop does so I stick to it
        self.module.flatten_parameters = self.widget_demagnetizer_y2k_edition

        # We need to register it in this module to make it work with weight dropout
        w_hh = FloatTensor(hidden_size).type_as(getattr(self.module, 'weight_hh_l0').data)
        w_hh.uniform_(-1, 1)

        getattr(self.module, 'bias_ih_l0').data.fill_(0)
        getattr(self.module, 'bias_hh_l0').data.fill_(0)

        self.register_parameter(name='weight_hh_l0', param=Parameter(w_hh))
        del self.module._parameters['weight_hh_l0']

    def widget_demagnetizer_y2k_edition(*args, **kwargs):
        # We need to replace flatten_parameters with a nothing function
        # It must be a function rather than a lambda as otherwise pickling explodes
        # We can't write boring code though, so ... WIDGET DEMAGNETIZER Y2K EDITION!
        # (╯°□°)╯︵ ┻━┻
        return

    def _setweights(self):
        w_hh = getattr(self, 'weight_hh_l0')
        w_hh = diag(w_hh)
        setattr(self.module, 'weight_hh_l0', w_hh)

    def forward(self, *args):
        self._setweights()
        return self.module.forward(*args)
Esempio n. 3
0
    def train_torch(self, X, y_true, batch_size, learning_rate, num_epochs,
                    print_many, verbose):
        self.batch_size = batch_size
        progresses = {
            int(num_epochs // (100 / i)): i
            for i in range(1, 101, 1)
        }
        t0 = counter()
        durations = []

        device = torch.device('cuda:0')
        rnn = RNN(input_size=self.input_dim,
                  hidden_size=self.hidden_dim,
                  num_layers=1,
                  nonlinearity='tanh',
                  bias=True,
                  batch_first=False).to(device)
        fc = FCLayer(self.hidden_dim, self.output_size, bias=True).to(device)
        params = [rnn.parameters(), fc.params()]
        optimizer = SGD(chain(*params), lr=learning_rate)
        for epoch in range(num_epochs):
            epoch_loss = 0
            for i in range(self.max_iters):
                x_batch = X[i * self.batch_size:(i + 1) * self.batch_size]
                x_batch = np.array(
                    [x_batch[:, step, :] for step in range(self.time_steps)])
                y_true_batch = y_true[i * self.batch_size:(i + 1) *
                                      self.batch_size]
                batch_size_local = x_batch.shape[1]

                # convert to pytorch tensor
                y_true_batch = y_true_batch.astype(np.int64)
                y_true_batch = torch.tensor(y_true_batch,
                                            requires_grad=False).to(device)
                x_batch = x_batch.astype(np.float32)
                x_batch = torch.tensor(x_batch, requires_grad=True).to(device)

                # forward pass
                h_stack, h_last = rnn.forward(x_batch, hx=None)
                fc_out = fc.forward(h_last)
                log_y_pred = F.log_softmax(input=fc_out, dim=2)
                log_y_pred = log_y_pred.view(batch_size_local,
                                             self.output_size)
                loss = F.nll_loss(input=log_y_pred,
                                  target=y_true_batch,
                                  reduction='mean')

                # update gradient
                optimizer.zero_grad()
                loss.backward()
                epoch_loss += loss.item()
                optimizer.step()

            durations.append(counter() - t0)
            t0 = counter()
            if (print_many and epoch % 100 == 0) or (not print_many
                                                     and epoch in progresses):
                print(
                    f"after epoch: {epoch}, epoch_losses: {round(epoch_loss / self.max_iters, 3)}"
                )

        if verbose > 0:
            avg_epoch_time = sum(durations) / len(durations)
            print("average epoch time:", round(avg_epoch_time, 3))
            return avg_epoch_time