def sample(model, data, dev, nsamples=100, use_max=False): while True: cmd = input('Do you want to sample text from the model [y/n]') if cmd == 'n': return else: seed = input('Please input some seeding text, e.g., #include <c: ') inputs = [] for c in seed: x = np.zeros((1, data.vocab_size), dtype=np.float32) x[0, data.char_to_idx[c]] = 1 tx = tensor.from_numpy(x) tx.to_device(dev) inputs.append(tx) model.reset_states(dev) outputs = model(inputs) y = tensor.softmax(outputs[-1]) sys.stdout.write(seed) for i in range(nsamples): prob = tensor.to_numpy(y)[0] if use_max: cur = np.argmax(prob) else: cur = np.random.choice(data.vocab_size, 1, p=prob)[0] sys.stdout.write(data.idx_to_char[cur]) x = np.zeros((1, data.vocab_size), dtype=np.float32) x[0, cur] = 1 tx = tensor.from_numpy(x) tx.to_device(dev) outputs = model([tx]) y = tensor.softmax(outputs[-1])
def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16, model_path='model'): # SGD with L2 gradient normalization cuda = device.create_cuda_gpu() model = CharRNN(data.vocab_size, hidden_size) model.graph(True, False) inputs, labels = None, None for epoch in range(max_epoch): model.train() train_loss = 0 for b in tqdm(range(data.num_train_batch)): batch = data.train_dat[b * batch_size:(b + 1) * batch_size] inputs, labels = convert(batch, batch_size, seq_length, data.vocab_size, cuda, inputs, labels) out, loss = model(inputs, labels) model.reset_states(cuda) train_loss += tensor.to_numpy(loss)[0] print('\nEpoch %d, train loss is %f' % (epoch, train_loss / data.num_train_batch / seq_length)) evaluate(model, data, batch_size, seq_length, cuda, inputs, labels) sample(model, data, cuda)
def evaluate(model, data, batch_size, seq_length, dev, inputs, labels): model.eval() val_loss = 0.0 for b in range(data.num_test_batch): batch = data.val_dat[b * batch_size:(b + 1) * batch_size] inputs, labels = convert(batch, batch_size, seq_length, data.vocab_size, dev, inputs, labels) model.reset_states(dev) y = model(inputs) loss = autograd.softmax_cross_entropy(y, labels)[0] val_loss += tensor.to_numpy(loss)[0] print(' validation loss is %f' % (val_loss / data.num_test_batch / seq_length))
def _forward_helper(self, dev, is_train, use_graph, sequential): self.generate_data(dev) model = MLP(self.sgd) model.compile([self.inputs], is_train=is_train, use_graph=use_graph, sequential=sequential) self.get_params(model) out = model(self.inputs) np_out = self.numpy_forward(self.data) np.testing.assert_array_almost_equal(tensor.to_numpy(out), np_out)
def _train_one_batch_helper(self, dev, is_train, use_graph, sequential): self.generate_data(dev) model = MLP(num_classes=2) model.set_optimizer(self.sgd) model.compile([self.inputs], is_train=is_train, use_graph=use_graph, sequential=sequential) self.get_params(model) out, loss = model(self.inputs, self.target) np_out, np_loss = self.numpy_train_one_batch(self.data, self.label) np.testing.assert_array_almost_equal(tensor.to_numpy(out), np_out) np.testing.assert_array_almost_equal(tensor.to_numpy(loss), np_loss) np.testing.assert_array_almost_equal(tensor.to_numpy(self.w0), self.W0) np.testing.assert_array_almost_equal(tensor.to_numpy(self.b0), self.B0) np.testing.assert_array_almost_equal(tensor.to_numpy(self.w1), self.W1) np.testing.assert_array_almost_equal(tensor.to_numpy(self.b1), self.B1)
# generate the boundary f = lambda x: (5 * x + 1) bd_x = np.linspace(-1.0, 1, 200) bd_y = f(bd_x) # generate the training data x = np.random.uniform(-1, 1, 400) y = f(x) + 2 * np.random.randn(len(x)) # convert training data to 2d space label = np.asarray([5 * a + 1 > b for (a, b) in zip(x, y)]).astype(np.int32) data = np.array([[a, b] for (a, b) in zip(x, y)], dtype=np.float32) dev = device.create_cuda_gpu_on(0) sgd = opt.SGD(0.05) tx = tensor.Tensor((400, 2), dev, tensor.float32) ty = tensor.Tensor((400,), dev, tensor.int32) model = MLP(data_size=2, perceptron_size=3, num_classes=2) # attached model to graph model.set_optimizer(sgd) model.compile([tx], is_train=True, use_graph=True, sequential=False) model.train() for i in range(1001): tx.copy_from_numpy(data) ty.copy_from_numpy(label) out, loss = model(tx, ty, 'fp32', spars=None) if i % 100 == 0: print("training loss = ", tensor.to_numpy(loss)[0])