コード例 #1
0
def get_enwik8_splits(data_file, batch_size, debug=False):
    with open(data_file, 'r') as f:
        if debug:
            chars = f.read(10**4)
        else:
            chars = f.read()
    print("loaded {} chars".format(len(chars)))

    data = chars_to_tensor(chars)
    train_split_idx = int(len(chars) * 0.6)
    train_data = data[:train_split_idx]
    dim_batch = train_data.shape[0] // batch_size
    train_data = train_data[:dim_batch * batch_size].reshape(batch_size, -1)
    train_data = train_data.transpose(0, 1)
    train_data = cuda_move(train_data)

    val_split_idx = int(len(chars) * 0.8)
    val_data = data[train_split_idx:val_split_idx]
    dim_batch = val_data.shape[0] // batch_size
    val_data = val_data[:dim_batch * batch_size].reshape(batch_size, -1)
    val_data = val_data.transpose(0, 1)
    val_data = cuda_move(val_data)

    test_data = data[val_split_idx:]
    dim_batch = test_data.shape[0] // batch_size
    test_data = test_data[:dim_batch * batch_size].reshape(batch_size, -1)
    test_data = test_data.transpose(0, 1)
    test_data = cuda_move(test_data)
    return train_data, val_data, test_data
コード例 #2
0
    def iter(self):
        x = cuda_move(torch.randn(*self.x_shape))
        y = cuda_move(torch.randn(*self.y_shape))

        t_x = torch.tensor([self.x_shape[0] for _ in range(x.shape[1])])
        t_y = torch.tensor([self.y_shape[0] for _ in range(x.shape[1])])
        yield x, (y, t_x, t_y)
コード例 #3
0
ファイル: mnist.py プロジェクト: AntonioCarta/cannon
    def __init__(self, data_dir, set_key='train', debug=False, batch_size=64):
        """
        MNIST Digit Dataset.

        Args:
            data_dir: file directory
            set_key: one of 'train', 'valid', 'test'
            debug: if True only loads a small number of samples (e.g. 100)
        """
        super().__init__()
        self.set_key = set_key
        self.debug = debug
        self.batch_size = batch_size
        bool_train = set_key != 'test'
        raw_data = datasets.MNIST(data_dir, train=bool_train, download=True)

        if set_key == 'train':
            self.data = raw_data.data[:50000]
            self.labels = raw_data.targets[:50000]
        elif set_key == 'valid':
            self.data = raw_data.data[50000:]
            self.labels = raw_data.targets[50000:]
        elif set_key == 'test':
            self.data = raw_data.test_data
            self.labels = raw_data.test_labels

        if debug:
            self.data = self.data[:1000]
            self.labels = self.labels[:1000]
        self.data = cuda_move(self.data.float())
        self.labels = cuda_move(self.labels)
コード例 #4
0
    def __init__(self, data_dir, key, batch_size=32, debug=False) -> None:
        """
            IAMOnDB Handwritten character recognition.
            A many-to-one sequence problem, typically solved with RNN+CTC loss.
            refs: https://papers.nips.cc/paper/3213-unconstrained-on-line-handwriting-recognition-with-recurrent-neural-networks
        """
        super().__init__()
        self.key = key
        self.batch_size = batch_size

        if not path.exists(data_dir + f'/x_{key}.npy'):
            print("Loading from raw data.")
            f_ids = data_dir + f'/ids_{key}.txt'
            file_names = [f.strip() for f in open(f_ids, mode='r').readlines()]
            self.xs, self.ys = fetch_iamondb_from_list(data_dir, file_names)
            np.save(data_dir + f'/x_{key}.npy', self.xs, allow_pickle=True)
            np.save(data_dir + f'/y_{key}.npy', self.ys, allow_pickle=True)
        else:
            print("Loading numpy arrays.")
            self.xs = np.load(data_dir + f'/x_{key}.npy', allow_pickle=True)
            self.ys = np.load(data_dir + f'/y_{key}.npy', allow_pickle=True)

        self.xs = [cuda_move(torch.tensor(el)) for el in self.xs]
        self.ys = [cuda_move(torch.tensor(el)) for el in self.ys]

        if debug:
            self.xs = self.xs[:200]
            self.ys = self.ys[:200]
コード例 #5
0
    def _save_gradient_histograms(self, model_trainer, train_data):
        # Add gradient norm histogram
        n_iter = model_trainer.global_step
        random_shuffle = list(train_data.get_one_hot_list())
        random.shuffle(random_shuffle)
        for par in model_trainer.model.parameters():
            par.accumulated_grad = []

        n_samples = 100
        for X_i, y_i in random_shuffle[:n_samples]:
            X_data, y_data = cuda_move(X_i), cuda_move(y_i)
            # TODO: backprop through thousand of time steps
            y_out = model_trainer.model.forward(X_data, logits=True)
            loss = F.binary_cross_entropy_with_logits(y_out, y_data)
            model_trainer.model.zero_grad()
            loss.backward()

            for par in model_trainer.model.parameters():
                par.accumulated_grad.append(par.grad)

        for name, par in model_trainer.model.named_parameters():
            t = torch.stack(par.accumulated_grad, 0)
            self.writer.add_histogram('grad/' + name,
                                      t.clone().cpu().data.numpy(),
                                      n_iter,
                                      bins='sturges')
            par.accumulated_grad = None
コード例 #6
0
    def get_batch(self, batch_size):
        rand_seq = torch.randint(0, self.K, (self.S, batch_size, 1)).long()
        blank_T = self.K * torch.ones((self.T, batch_size, 1)).long()
        blank_S = self.K * torch.ones((self.S, batch_size, 1)).long()

        input_seq = torch.cat([rand_seq, blank_T, blank_S], dim=0)
        input_seq[self.T + self.S - 1, :] = self.K + 1
        output_seq = torch.cat([blank_S, blank_T, rand_seq], dim=0)
        return cuda_move(input_seq), cuda_move(output_seq)
コード例 #7
0
ファイル: timit.py プロジェクト: AntonioCarta/cannon
    def iter(self):
        it = timit_generator(self.X, self.y, self.n_classes, self.batch_size,
                             self.noise_std)
        for el in it:
            yield [cuda_move(x) for x in el]

        if self.key == 'noisy_valid':
            for _ in range(4):
                it = timit_generator(self.X, self.y, self.n_classes,
                                     self.batch_size, self.noise_std)
                for el in it:
                    yield [cuda_move(x) for x in el]
コード例 #8
0
ファイル: mslmn.py プロジェクト: AntonioCarta/cannon
 def init_hidden(self, batch_size: int) -> Tensor:
     if self.learn_h0:
         return self.h0[:self.memory_size *
                        self.num_modules].unsqueeze(0).reshape(
                            batch_size, -1)
     else:
         return cuda_move(
             torch.zeros(batch_size, self.memory_size * self.num_modules))
コード例 #9
0
 def __init__(self, data_dir, seq_id):
     """
         Sequence Generation tasks. The model does not receive any input and it must return a desired sequence as output.
         ref: Clockwork RNN
     """
     super().__init__()
     self.data_dir = data_dir
     self.seq_id = seq_id
     self.ys = np.load(data_dir + 'samples.npy')[seq_id].reshape(-1, 1, 1)
     self.ys = cuda_move(torch.tensor(self.ys).float())
コード例 #10
0
 def before_training(self, model_trainer):
     if self.input_dim is not None:
         dummy_input = cuda_move(Variable(torch.zeros(self.input_dim)))
         model_file = self.log_dir + 'onnx_model.proto'
         torch.onnx.export(model_trainer.model,
                           dummy_input,
                           model_file,
                           verbose=True)
         self.writer.add_graph_onnx(model_file)
     pass
コード例 #11
0
ファイル: piano_roll.py プロジェクト: AntonioCarta/cannon
 def iter(self):
     n_samples = len(self.xs)
     if self.key == 'train':  # shuffle dataset
         idxs = torch.randperm(n_samples)
         self.xs = [self.xs[ii] for ii in idxs]
         self.ys = [self.ys[ii] for ii in idxs]
     for ii in range(0, len(self.xs), self.batch_size):
         x_batch = self.xs[ii:ii + self.batch_size]
         y_batch = self.ys[ii:ii + self.batch_size]
         t_batch = torch.tensor([el.shape[0] for el in x_batch])
         bs = len(x_batch)
         t_max = max(el.shape[0] for el in x_batch)
         x = torch.zeros(t_max, bs, x_batch[0].shape[-1])
         y = torch.zeros(t_max, bs, x_batch[0].shape[-1])
         for bi, (x_el, y_el) in enumerate(zip(x_batch, y_batch)):
             x[:x_el.shape[0], bi] = x_el
             y[:x_el.shape[0], bi] = y_el
         x = cuda_move(x)
         y = cuda_move(y)
         t_batch = cuda_move(t_batch)
         yield x, (y, t_batch)
def train_foo(log_dir, params):
    experiment_log = cannon.experiment.create_experiment_logger(log_dir)

    DEBUG = params['DEBUG']
    experiment_log.info("Loading Data.")
    if params['data_type'] == 'perseq_mnist':
        laes_file = './logs/perseq_mnist_laes_mem128.pkl'
        train_data = PermutedPixelMNIST(data_dir,
                                        'train',
                                        DEBUG,
                                        perm_file=data_dir + '/perm.pt')
        val_data = PermutedPixelMNIST(data_dir,
                                      'valid',
                                      DEBUG,
                                      perm_file=data_dir + '/perm.pt')
        test_data = PermutedPixelMNIST(data_dir,
                                       'test',
                                       DEBUG,
                                       perm_file=data_dir + '/perm.pt')
    elif params['data_type'] == 'seq_mnist':
        laes_file = './logs/seq_mnist_laes_mem128.pkl'
        train_data = SequentialPixelMNIST(data_dir, 'train', DEBUG)
        val_data = SequentialPixelMNIST(data_dir, 'valid', DEBUG)
        test_data = SequentialPixelMNIST(data_dir, 'test', DEBUG)
    experiment_log.info("Data Loaded.")

    if DEBUG:
        log_dir = f'./logs/debug/'
        params['n_epochs'] = 5

    model = LAESDeepReadout(mem_size=params['mem_size'],
                            num_layers=params['num_layers'],
                            num_classes=10,
                            pretrained_laes_file=laes_file)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=params['learning_rate'],
                                 weight_decay=params['weight_decay'])
    trainer = SequentialTaskTrainer(cuda_move(model),
                                    n_epochs=params['n_epochs'],
                                    optimizer=optimizer,
                                    log_dir=log_dir)
    trainer.append_hyperparam_dict(params)
    trainer.fit(train_data, val_data)
    te, ta = trainer.compute_metrics(test_data)
    trainer.logger.info(f"TEST set performance: loss {te}, acc {ta}")
    return trainer.best_result
コード例 #13
0
 def after_train_before_validate(self, model_trainer):
     model_name = self.log_dir + 'best_model'
     if os.path.isfile(model_name + '.pt'):
         # model_trainer.model = cuda_move(torch.load(model_name + '.pt'))
         model_trainer.model.load_state_dict(torch.load(model_name + '.pt'))
         model_trainer.logger.info(
             "Loaded best model checkpoint before final validation.")
     elif os.path.isfile(model_name + '.ptj'):
         try:
             model_trainer.model = cuda_move(
                 torch.jit.load(model_name + '.ptj'))
             model_trainer.logger.info(
                 "Loaded best model checkpoint before final validation.")
         except BaseException as e:
             model_trainer.logger.info(str(e))
             model_trainer.logger.info(
                 "Could not load model. Checkpoint is corrupted.")
     else:
         model_trainer.logger.info(
             "Checkpoint file not found. Using current model for final validation."
         )
コード例 #14
0
ファイル: bench_rnn.py プロジェクト: AntonioCarta/cannon
def bench():
    set_gpu()
    set_allow_cuda(True)
    T, B, F = 100, 64, 300
    n_trials = 10
    fake_input = cuda_move(torch.zeros(T, B).long())

    print("JitRNN ", end='')
    rnn = RNNLayer(100, 100)
    model = cuda_move(DiscreteRNN(F, F, 100, rnn=rnn))
    model(fake_input)
    foo = lambda: model(fake_input)
    timeit(foo, n_trials)

    print("JitLMN ", end='')
    rnn = LMNLayer(100, 100, 100)
    model = cuda_move(DiscreteRNN(F, F, 100, rnn=rnn))
    model(fake_input)
    foo = lambda: model(fake_input)
    timeit(foo, n_trials)

    print("JitSlowLMN ", end='')
    rnn = LMNLayer(100, 100, 100)
    model = cuda_move(DiscreteRNN(F, F, 100, rnn=rnn))
    model(fake_input)
    foo = lambda: model(fake_input)
    timeit(foo, n_trials)

    print("SlowLMN ", end='')
    rnn = SlowLMNLayer(100, 100, 100)
    model = cuda_move(SlowDiscreteRNN(F, F, 100, rnn=rnn))
    model(fake_input)
    foo = lambda: model(fake_input)
    timeit(foo, n_trials)

    print("SlowLSTM ", end='')
    rnn = SlowLSTM(input_size=100, hidden_size=100, num_layers=1)
    model = cuda_move(SlowDiscreteRNN(F, F, 100, rnn=rnn))
    model(fake_input)
    foo = lambda: model(fake_input)
    timeit(foo, n_trials)
コード例 #15
0
        f"Time dim (avg, std): {np.mean(train_x[:, 1]), np.std(train_x[:, 1])}"
    )
    print(f"x dim (avg, std): {np.mean(train_x[:, 2]), np.std(train_x[:, 2])}")
    print(f"y dim (avg, std): {np.mean(train_x[:, 3]), np.std(train_x[:, 3])}")

    print(
        f"Time dim (min, max): {np.min(train_x[:, 1]), np.max(train_x[:, 1])}")
    print(f"x dim (min, max): {np.min(train_x[:, 2]), np.max(train_x[:, 2])}")
    print(f"y dim (min, max): {np.min(train_x[:, 3]), np.max(train_x[:, 3])}")

    # Class Loader and loss computation
    valid_data = IAMOnDB(data_dir, key='valid')
    e = 0
    for batch in valid_data.iter():
        x, (y, x_t_batch, y_t_batch) = batch
        rand_pred = cuda_move(torch.randn(x.shape[0], x.shape[1], 58))
        e += valid_data.loss_score(batch, rand_pred)
    print(f"err: {e}")

    # Data Loader
    data = fetch_iamondb(data_dir)
    (train_x, train_y), (valid_x, valid_y), (test_x, test_y) = data

    assert len(train_x) == len(train_y)
    assert len(valid_x) == len(valid_y)
    assert len(test_x) == len(test_y)
    # 5364,  1,438,  1,518  and  3,859  written  lines  taken  from 775,  192,  216 and 544 forms
    print(f"# of features: {train_x[0].shape[1]}")
    print(
        f"classes: from {np.min([np.min(el) for el in train_y])} to {np.max([np.max(el) for el in train_y])}"
    )
コード例 #16
0
ファイル: bench_rnn.py プロジェクト: AntonioCarta/cannon
 def init_hidden(self, batch_size):
     h0 = torch.zeros(self.num_layers, batch_size, self.hidden_size)
     c0 = torch.zeros(self.num_layers, batch_size, self.hidden_size)
     return cuda_move(h0), cuda_move(c0)
コード例 #17
0
ファイル: bench_rnn.py プロジェクト: AntonioCarta/cannon
 def init_hidden(self, batch_size):
     return cuda_move(torch.zeros(batch_size, self.memory_size))
コード例 #18
0
ファイル: mslmn.py プロジェクト: AntonioCarta/cannon
                     torch.mm(x_prev, self.Wxh[:self.hidden_size*max_active_module].t()) + \
                     self.bh[:self.hidden_size*max_active_module]

        h_t = torch.cat(
            [h_t_update, h_prev[:, self.hidden_size * max_active_module:]],
            dim=1)
        return h_t, h_t


if __name__ == '__main__':
    import torch.nn.functional as F

    # test model update equivalence
    b, i, h, M = 13, 3, 5, 7
    num_modules = 11
    x = cuda_move(torch.randn(4, b, i))
    model = cuda_move(MultiScaleLMN(i, h, M, num_modules=1, max_modules=3))

    m_prev = model.init_hidden(b)
    y_old = model(x, m_prev)

    model.rnn_cell.update_num_modules(model.rnn_cell.num_modules + 1)
    g = model.rnn_cell.num_modules
    model.rnn_cell.Whm.data[(g - 1) * M:] = torch.zeros_like(
        model.rnn_cell.Whm.data[(g - 1) * M:])
    model.rnn_cell.Whm.data[(g - 1) * M:g * M] = torch.randn_like(
        model.rnn_cell.Whm.data[(g - 1) * M:g * M])
    model.rnn_cell.Wmh.data[:, (g - 1) * M:] = torch.zeros_like(
        model.rnn_cell.Wmh.data[:, (g - 1) * M:])
    model.rnn_cell.Wmm.data[(g - 1) * M:g * M, :g * M] = torch.zeros_like(
        model.rnn_cell.Wmm.data[(g - 1) * M:g * M, :g * M])
 def __init__(self, A, B, bias):
     self.hidden_size = B.shape[0]
     self.A = cuda_move(torch.tensor(A).float())
     self.B = cuda_move(torch.tensor(B).float())
     self.bias = cuda_move(torch.tensor(bias).float())
コード例 #20
0
 def __init__(self):
     super().__init__()
     self.t = cuda_move(torch.zeros(5, 5))
コード例 #21
0
ファイル: mslmn.py プロジェクト: AntonioCarta/cannon
 def init_hidden(self, batch_size: int) -> Tensor:
     return cuda_move(
         torch.zeros(batch_size, self.hidden_size * self.num_modules))
コード例 #22
0
        return -1, -1


if __name__ == '__main__':
    ms = 13
    params = {'pretrain_every': 2, 'memory_size': ms}
    log_dir = './logs/debug/'

    x_shape = (65, 32, 11)
    y_shape = x_shape
    data = DummyData(x_shape, y_shape)

    rnn = MultiScaleLMN(x_shape[2], 5, ms, num_modules=1, max_modules=5)
    cb = IncrementalTrainingCallback(params, [rnn], data, data, data, log_dir)

    model = cuda_move(ItemClassifier(rnn, ms * 5, 7))
    trainer = DummyTrainer(model)

    for x, y in data.iter():
        break

    y_old = model(x)
    cb.pretrain_module(trainer)
    y_new = model(x)
    assert ((y_old - y_new)**2).sum() == 0

    cb.pretrain_module(trainer)
    m_prev = rnn.init_hidden(x.shape[1])
    y_new = model(x)
    assert ((y_old - y_new)**2).sum() == 0
 def __call__(self, x):
     h0 = cuda_move(torch.zeros(x.shape[1], self.hidden_size))
     for ti in range(x.shape[0]):
         h0 = (x[ti] - self.bias) @ self.A.T + h0 @ self.B.T
     return h0
コード例 #24
0
 def iter(self):
     # x is just a dummy input because this dataset does not have an input
     x = cuda_move(torch.zeros(self.ys.shape[0], 1, 1))
     yield x, self.ys