def get_enwik8_splits(data_file, batch_size, debug=False): with open(data_file, 'r') as f: if debug: chars = f.read(10**4) else: chars = f.read() print("loaded {} chars".format(len(chars))) data = chars_to_tensor(chars) train_split_idx = int(len(chars) * 0.6) train_data = data[:train_split_idx] dim_batch = train_data.shape[0] // batch_size train_data = train_data[:dim_batch * batch_size].reshape(batch_size, -1) train_data = train_data.transpose(0, 1) train_data = cuda_move(train_data) val_split_idx = int(len(chars) * 0.8) val_data = data[train_split_idx:val_split_idx] dim_batch = val_data.shape[0] // batch_size val_data = val_data[:dim_batch * batch_size].reshape(batch_size, -1) val_data = val_data.transpose(0, 1) val_data = cuda_move(val_data) test_data = data[val_split_idx:] dim_batch = test_data.shape[0] // batch_size test_data = test_data[:dim_batch * batch_size].reshape(batch_size, -1) test_data = test_data.transpose(0, 1) test_data = cuda_move(test_data) return train_data, val_data, test_data
def iter(self): x = cuda_move(torch.randn(*self.x_shape)) y = cuda_move(torch.randn(*self.y_shape)) t_x = torch.tensor([self.x_shape[0] for _ in range(x.shape[1])]) t_y = torch.tensor([self.y_shape[0] for _ in range(x.shape[1])]) yield x, (y, t_x, t_y)
def __init__(self, data_dir, set_key='train', debug=False, batch_size=64): """ MNIST Digit Dataset. Args: data_dir: file directory set_key: one of 'train', 'valid', 'test' debug: if True only loads a small number of samples (e.g. 100) """ super().__init__() self.set_key = set_key self.debug = debug self.batch_size = batch_size bool_train = set_key != 'test' raw_data = datasets.MNIST(data_dir, train=bool_train, download=True) if set_key == 'train': self.data = raw_data.data[:50000] self.labels = raw_data.targets[:50000] elif set_key == 'valid': self.data = raw_data.data[50000:] self.labels = raw_data.targets[50000:] elif set_key == 'test': self.data = raw_data.test_data self.labels = raw_data.test_labels if debug: self.data = self.data[:1000] self.labels = self.labels[:1000] self.data = cuda_move(self.data.float()) self.labels = cuda_move(self.labels)
def __init__(self, data_dir, key, batch_size=32, debug=False) -> None: """ IAMOnDB Handwritten character recognition. A many-to-one sequence problem, typically solved with RNN+CTC loss. refs: https://papers.nips.cc/paper/3213-unconstrained-on-line-handwriting-recognition-with-recurrent-neural-networks """ super().__init__() self.key = key self.batch_size = batch_size if not path.exists(data_dir + f'/x_{key}.npy'): print("Loading from raw data.") f_ids = data_dir + f'/ids_{key}.txt' file_names = [f.strip() for f in open(f_ids, mode='r').readlines()] self.xs, self.ys = fetch_iamondb_from_list(data_dir, file_names) np.save(data_dir + f'/x_{key}.npy', self.xs, allow_pickle=True) np.save(data_dir + f'/y_{key}.npy', self.ys, allow_pickle=True) else: print("Loading numpy arrays.") self.xs = np.load(data_dir + f'/x_{key}.npy', allow_pickle=True) self.ys = np.load(data_dir + f'/y_{key}.npy', allow_pickle=True) self.xs = [cuda_move(torch.tensor(el)) for el in self.xs] self.ys = [cuda_move(torch.tensor(el)) for el in self.ys] if debug: self.xs = self.xs[:200] self.ys = self.ys[:200]
def _save_gradient_histograms(self, model_trainer, train_data): # Add gradient norm histogram n_iter = model_trainer.global_step random_shuffle = list(train_data.get_one_hot_list()) random.shuffle(random_shuffle) for par in model_trainer.model.parameters(): par.accumulated_grad = [] n_samples = 100 for X_i, y_i in random_shuffle[:n_samples]: X_data, y_data = cuda_move(X_i), cuda_move(y_i) # TODO: backprop through thousand of time steps y_out = model_trainer.model.forward(X_data, logits=True) loss = F.binary_cross_entropy_with_logits(y_out, y_data) model_trainer.model.zero_grad() loss.backward() for par in model_trainer.model.parameters(): par.accumulated_grad.append(par.grad) for name, par in model_trainer.model.named_parameters(): t = torch.stack(par.accumulated_grad, 0) self.writer.add_histogram('grad/' + name, t.clone().cpu().data.numpy(), n_iter, bins='sturges') par.accumulated_grad = None
def get_batch(self, batch_size): rand_seq = torch.randint(0, self.K, (self.S, batch_size, 1)).long() blank_T = self.K * torch.ones((self.T, batch_size, 1)).long() blank_S = self.K * torch.ones((self.S, batch_size, 1)).long() input_seq = torch.cat([rand_seq, blank_T, blank_S], dim=0) input_seq[self.T + self.S - 1, :] = self.K + 1 output_seq = torch.cat([blank_S, blank_T, rand_seq], dim=0) return cuda_move(input_seq), cuda_move(output_seq)
def iter(self): it = timit_generator(self.X, self.y, self.n_classes, self.batch_size, self.noise_std) for el in it: yield [cuda_move(x) for x in el] if self.key == 'noisy_valid': for _ in range(4): it = timit_generator(self.X, self.y, self.n_classes, self.batch_size, self.noise_std) for el in it: yield [cuda_move(x) for x in el]
def init_hidden(self, batch_size: int) -> Tensor: if self.learn_h0: return self.h0[:self.memory_size * self.num_modules].unsqueeze(0).reshape( batch_size, -1) else: return cuda_move( torch.zeros(batch_size, self.memory_size * self.num_modules))
def __init__(self, data_dir, seq_id): """ Sequence Generation tasks. The model does not receive any input and it must return a desired sequence as output. ref: Clockwork RNN """ super().__init__() self.data_dir = data_dir self.seq_id = seq_id self.ys = np.load(data_dir + 'samples.npy')[seq_id].reshape(-1, 1, 1) self.ys = cuda_move(torch.tensor(self.ys).float())
def before_training(self, model_trainer): if self.input_dim is not None: dummy_input = cuda_move(Variable(torch.zeros(self.input_dim))) model_file = self.log_dir + 'onnx_model.proto' torch.onnx.export(model_trainer.model, dummy_input, model_file, verbose=True) self.writer.add_graph_onnx(model_file) pass
def iter(self): n_samples = len(self.xs) if self.key == 'train': # shuffle dataset idxs = torch.randperm(n_samples) self.xs = [self.xs[ii] for ii in idxs] self.ys = [self.ys[ii] for ii in idxs] for ii in range(0, len(self.xs), self.batch_size): x_batch = self.xs[ii:ii + self.batch_size] y_batch = self.ys[ii:ii + self.batch_size] t_batch = torch.tensor([el.shape[0] for el in x_batch]) bs = len(x_batch) t_max = max(el.shape[0] for el in x_batch) x = torch.zeros(t_max, bs, x_batch[0].shape[-1]) y = torch.zeros(t_max, bs, x_batch[0].shape[-1]) for bi, (x_el, y_el) in enumerate(zip(x_batch, y_batch)): x[:x_el.shape[0], bi] = x_el y[:x_el.shape[0], bi] = y_el x = cuda_move(x) y = cuda_move(y) t_batch = cuda_move(t_batch) yield x, (y, t_batch)
def train_foo(log_dir, params): experiment_log = cannon.experiment.create_experiment_logger(log_dir) DEBUG = params['DEBUG'] experiment_log.info("Loading Data.") if params['data_type'] == 'perseq_mnist': laes_file = './logs/perseq_mnist_laes_mem128.pkl' train_data = PermutedPixelMNIST(data_dir, 'train', DEBUG, perm_file=data_dir + '/perm.pt') val_data = PermutedPixelMNIST(data_dir, 'valid', DEBUG, perm_file=data_dir + '/perm.pt') test_data = PermutedPixelMNIST(data_dir, 'test', DEBUG, perm_file=data_dir + '/perm.pt') elif params['data_type'] == 'seq_mnist': laes_file = './logs/seq_mnist_laes_mem128.pkl' train_data = SequentialPixelMNIST(data_dir, 'train', DEBUG) val_data = SequentialPixelMNIST(data_dir, 'valid', DEBUG) test_data = SequentialPixelMNIST(data_dir, 'test', DEBUG) experiment_log.info("Data Loaded.") if DEBUG: log_dir = f'./logs/debug/' params['n_epochs'] = 5 model = LAESDeepReadout(mem_size=params['mem_size'], num_layers=params['num_layers'], num_classes=10, pretrained_laes_file=laes_file) optimizer = torch.optim.Adam(model.parameters(), lr=params['learning_rate'], weight_decay=params['weight_decay']) trainer = SequentialTaskTrainer(cuda_move(model), n_epochs=params['n_epochs'], optimizer=optimizer, log_dir=log_dir) trainer.append_hyperparam_dict(params) trainer.fit(train_data, val_data) te, ta = trainer.compute_metrics(test_data) trainer.logger.info(f"TEST set performance: loss {te}, acc {ta}") return trainer.best_result
def after_train_before_validate(self, model_trainer): model_name = self.log_dir + 'best_model' if os.path.isfile(model_name + '.pt'): # model_trainer.model = cuda_move(torch.load(model_name + '.pt')) model_trainer.model.load_state_dict(torch.load(model_name + '.pt')) model_trainer.logger.info( "Loaded best model checkpoint before final validation.") elif os.path.isfile(model_name + '.ptj'): try: model_trainer.model = cuda_move( torch.jit.load(model_name + '.ptj')) model_trainer.logger.info( "Loaded best model checkpoint before final validation.") except BaseException as e: model_trainer.logger.info(str(e)) model_trainer.logger.info( "Could not load model. Checkpoint is corrupted.") else: model_trainer.logger.info( "Checkpoint file not found. Using current model for final validation." )
def bench(): set_gpu() set_allow_cuda(True) T, B, F = 100, 64, 300 n_trials = 10 fake_input = cuda_move(torch.zeros(T, B).long()) print("JitRNN ", end='') rnn = RNNLayer(100, 100) model = cuda_move(DiscreteRNN(F, F, 100, rnn=rnn)) model(fake_input) foo = lambda: model(fake_input) timeit(foo, n_trials) print("JitLMN ", end='') rnn = LMNLayer(100, 100, 100) model = cuda_move(DiscreteRNN(F, F, 100, rnn=rnn)) model(fake_input) foo = lambda: model(fake_input) timeit(foo, n_trials) print("JitSlowLMN ", end='') rnn = LMNLayer(100, 100, 100) model = cuda_move(DiscreteRNN(F, F, 100, rnn=rnn)) model(fake_input) foo = lambda: model(fake_input) timeit(foo, n_trials) print("SlowLMN ", end='') rnn = SlowLMNLayer(100, 100, 100) model = cuda_move(SlowDiscreteRNN(F, F, 100, rnn=rnn)) model(fake_input) foo = lambda: model(fake_input) timeit(foo, n_trials) print("SlowLSTM ", end='') rnn = SlowLSTM(input_size=100, hidden_size=100, num_layers=1) model = cuda_move(SlowDiscreteRNN(F, F, 100, rnn=rnn)) model(fake_input) foo = lambda: model(fake_input) timeit(foo, n_trials)
f"Time dim (avg, std): {np.mean(train_x[:, 1]), np.std(train_x[:, 1])}" ) print(f"x dim (avg, std): {np.mean(train_x[:, 2]), np.std(train_x[:, 2])}") print(f"y dim (avg, std): {np.mean(train_x[:, 3]), np.std(train_x[:, 3])}") print( f"Time dim (min, max): {np.min(train_x[:, 1]), np.max(train_x[:, 1])}") print(f"x dim (min, max): {np.min(train_x[:, 2]), np.max(train_x[:, 2])}") print(f"y dim (min, max): {np.min(train_x[:, 3]), np.max(train_x[:, 3])}") # Class Loader and loss computation valid_data = IAMOnDB(data_dir, key='valid') e = 0 for batch in valid_data.iter(): x, (y, x_t_batch, y_t_batch) = batch rand_pred = cuda_move(torch.randn(x.shape[0], x.shape[1], 58)) e += valid_data.loss_score(batch, rand_pred) print(f"err: {e}") # Data Loader data = fetch_iamondb(data_dir) (train_x, train_y), (valid_x, valid_y), (test_x, test_y) = data assert len(train_x) == len(train_y) assert len(valid_x) == len(valid_y) assert len(test_x) == len(test_y) # 5364, 1,438, 1,518 and 3,859 written lines taken from 775, 192, 216 and 544 forms print(f"# of features: {train_x[0].shape[1]}") print( f"classes: from {np.min([np.min(el) for el in train_y])} to {np.max([np.max(el) for el in train_y])}" )
def init_hidden(self, batch_size): h0 = torch.zeros(self.num_layers, batch_size, self.hidden_size) c0 = torch.zeros(self.num_layers, batch_size, self.hidden_size) return cuda_move(h0), cuda_move(c0)
def init_hidden(self, batch_size): return cuda_move(torch.zeros(batch_size, self.memory_size))
torch.mm(x_prev, self.Wxh[:self.hidden_size*max_active_module].t()) + \ self.bh[:self.hidden_size*max_active_module] h_t = torch.cat( [h_t_update, h_prev[:, self.hidden_size * max_active_module:]], dim=1) return h_t, h_t if __name__ == '__main__': import torch.nn.functional as F # test model update equivalence b, i, h, M = 13, 3, 5, 7 num_modules = 11 x = cuda_move(torch.randn(4, b, i)) model = cuda_move(MultiScaleLMN(i, h, M, num_modules=1, max_modules=3)) m_prev = model.init_hidden(b) y_old = model(x, m_prev) model.rnn_cell.update_num_modules(model.rnn_cell.num_modules + 1) g = model.rnn_cell.num_modules model.rnn_cell.Whm.data[(g - 1) * M:] = torch.zeros_like( model.rnn_cell.Whm.data[(g - 1) * M:]) model.rnn_cell.Whm.data[(g - 1) * M:g * M] = torch.randn_like( model.rnn_cell.Whm.data[(g - 1) * M:g * M]) model.rnn_cell.Wmh.data[:, (g - 1) * M:] = torch.zeros_like( model.rnn_cell.Wmh.data[:, (g - 1) * M:]) model.rnn_cell.Wmm.data[(g - 1) * M:g * M, :g * M] = torch.zeros_like( model.rnn_cell.Wmm.data[(g - 1) * M:g * M, :g * M])
def __init__(self, A, B, bias): self.hidden_size = B.shape[0] self.A = cuda_move(torch.tensor(A).float()) self.B = cuda_move(torch.tensor(B).float()) self.bias = cuda_move(torch.tensor(bias).float())
def __init__(self): super().__init__() self.t = cuda_move(torch.zeros(5, 5))
def init_hidden(self, batch_size: int) -> Tensor: return cuda_move( torch.zeros(batch_size, self.hidden_size * self.num_modules))
return -1, -1 if __name__ == '__main__': ms = 13 params = {'pretrain_every': 2, 'memory_size': ms} log_dir = './logs/debug/' x_shape = (65, 32, 11) y_shape = x_shape data = DummyData(x_shape, y_shape) rnn = MultiScaleLMN(x_shape[2], 5, ms, num_modules=1, max_modules=5) cb = IncrementalTrainingCallback(params, [rnn], data, data, data, log_dir) model = cuda_move(ItemClassifier(rnn, ms * 5, 7)) trainer = DummyTrainer(model) for x, y in data.iter(): break y_old = model(x) cb.pretrain_module(trainer) y_new = model(x) assert ((y_old - y_new)**2).sum() == 0 cb.pretrain_module(trainer) m_prev = rnn.init_hidden(x.shape[1]) y_new = model(x) assert ((y_old - y_new)**2).sum() == 0
def __call__(self, x): h0 = cuda_move(torch.zeros(x.shape[1], self.hidden_size)) for ti in range(x.shape[0]): h0 = (x[ti] - self.bias) @ self.A.T + h0 @ self.B.T return h0
def iter(self): # x is just a dummy input because this dataset does not have an input x = cuda_move(torch.zeros(self.ys.shape[0], 1, 1)) yield x, self.ys