def generate_learned_samples(self): ''' Draw and generate data. Returns: `Tuple` data. The shape is ... - `mxnet.ndarray` of observed data points in training. - `mxnet.ndarray` of supervised data in training. - `mxnet.ndarray` of observed data points in test. - `mxnet.ndarray` of supervised data in test. ''' for _ in range(self.iter_n): training_batch_arr, test_batch_arr = None, None training_label_arr, test_label_arr = None, None row_arr = np.arange(self.__train_observed_arr.shape[0]) np.random.shuffle(row_arr) training_batch_arr = self.__train_observed_arr[ row_arr[:self.batch_size]] training_batch_arr = torch.from_numpy(training_batch_arr) training_batch_arr = training_batch_arr.to(self.__ctx).float() training_batch_arr = self.pre_normalize(training_batch_arr) label_key_arr = self.__train_label_arr[row_arr[:self.batch_size]] label_key_arr = torch.from_numpy(label_key_arr) label_key_arr = label_key_arr.to(self.__ctx).float() training_label_arr = torch.one_hot(label_key_arr, self.__label_n) test_row_arr = np.arange(self.__test_observed_arr.shape[0]) np.random.shuffle(test_row_arr) test_batch_arr = self.__test_observed_arr[ test_row_arr[:self.batch_size]] test_batch_arr = torch.from_numpy(test_batch_arr) test_batch_arr = test_batch_arr.to(self.__ctx).float() test_batch_arr = self.pre_normalize(test_batch_arr) test_label_key_arr = self.__test_label_arr[ test_row_arr[:self.batch_size]] test_label_key_arr = torch.from_numpy(test_label_key_arr) test_label_key_arr = test_label_key_arr.to(self.__ctx).float() test_label_arr = torch.one_hot(test_label_key_arr, self.__label_n) if self.__noiseable_data is not None: training_batch_arr = self.__noiseable_data.noise( training_batch_arr) yield training_batch_arr, training_label_arr, test_batch_arr, test_label_arr
def test_one_hot_variable(self): size = (3, 3) index = torch.LongTensor([2, 0, 1]).view(-1, 1) ret = torch.one_hot(size, Variable(index)) expected = torch.LongTensor([[0, 0, 1], [1, 0, 0], [0, 1, 0]]) assert ret.size() == expected.size() assert all(torch.eq(ret.view(-1).data, expected.view(-1)))
def train(opt, train_loader, test_loader, model, writer): num_data = len(train_loader.dataset) num_batches = len(train_loader) optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr) scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.5) model.train() for epoch in range(opt.epochs): # Update learning rate scheduler.step() print('Learning rate: {}'.format(scheduler.get_lr()[0])) start_time = time.time() for batch_idx, (data, target) in enumerate(tqdm(train_loader)): batch_size = data.size(0) global_step = batch_idx + epoch * num_batches # Transform to one-hot indices: [batch_size, 10] target = torch.one_hot((batch_size, 10), target.view(-1, 1)) assert target.size() == torch.Size([batch_size, 10]) # Use GPU if available with torch.no_grad(): data, target = Variable(data), Variable(target) if opt.use_cuda & torch.cuda.is_available(): data, target = data.cuda(), target.cuda() # Train step optimizer.zero_grad() output = model(data) L, m_loss, r_loss = model.loss(output, target, data) L.backward() optimizer.step() # Log losses writer.add_scalar('train/loss', L.item(), global_step) writer.add_scalar('train/marginal_loss', m_loss.item(), global_step) writer.add_scalar('train/reconstruction_loss', r_loss.item(), global_step) # Print losses if batch_idx % opt.print_every == 0: tqdm.write( 'Epoch: {} Loss: {:.6f} Marginal loss: {:.6f} Recons. loss: {:.6f}' .format(epoch, L.item(), m_loss.item(), r_loss.item())) # Print time elapsed for every epoch end_time = time.time() print('Epoch {} takes {:.0f} seconds.'.format(epoch, end_time - start_time)) # Test model test(opt, test_loader, model, writer, epoch, num_batches)
def forward(self, inputs, **kw): cfg = self.cfg seq, typ, idx, val, fit, mlm = inputs seq = y = self.trafo([[seq, typ], None], **kw) fit_y = self.pool(torch.squeeze(y[:, 0:1, :], axis=1), **kw) y = torch.gather(y, idx, axis=1) y = self.norm(self.mlm_dense(y, **kw), **kw) e = self.trafo.tok_embed.embeddings y = torch.matmul(y, e, transpose_b=True) y = torch.log_softmax(torch.bias_add(y, self.mlm_bias), axis=-1) mlm_loss = -torch.reduce_sum(y * torch.one_hot(val, cfg.s_vocab), axis=-1) y = torch.matmul(fit_y, self.gain, transpose_b=True) y = torch.log_softmax(torch.bias_add(y, self.bias), axis=-1) fit_loss = -torch.reduce_sum(y * torch.one_hot(fit, 2), axis=-1) loss = torch.reduce_sum(mlm * mlm_loss) loss /= (torch.reduce_sum(mlm) + 1e-5) + torch.reduce_mean(fit_loss) return seq, loss
def lookup(self, x, i): t = self.weights[i] if self.one_hot: y = torch.one_hot(x, torch.shape(t)[0], axis=-1) y = torch.einsum("np,in->ip", t, y) else: cfg = self.cfg y = F.embedding(x, t, cfg.PAD, cfg.max_norm, cfg.norm_type, cfg.scale_grad, cfg.sparse) a = self.adjusts[i] if a is not None: y = torch.einsum("ip,ph->ih", y, a) return y
def forward(self, *input): if type(input) is list: # true label is provided with shape = [None, n_classes], i.e. one-hot code. assert len(input) == 2 inputs, mask = input mask = K.one_hot(K.cast(mask, 'int32'), inputs.get_shape().as_list()[1]) else: # if no true label, mask by the max length of capsules. Mainly used for prediction # compute lengths of capsules x = torch.sqrt(torch.sum(K.square(input), -1)) # generate the mask which is a one-hot code. # mask.shape=[None, n_classes]=[None, num_capsule] _, indices = torch.nn.top_k(x, 2) mask = torch.one_hot(indices, input.get_shape().as_list()[1]) # inputs.shape=[None, num_capsule, dim_capsule] # mask.shape=[None, num_capsule] # masked.shape=[None, num_capsule * dim_capsule] masked = torch.batch_dot(mask, input) return masked
def search(self, tgt, ctx, i=None): cfg = self.cfg unk = torch.equal(tgt, cfg.UNK) prior = torch.one_hot(tgt, cfg.num_toks, 0.0, utils.big_neg) if i is not None: unk = unk[:, i] prior = prior[:, i, :] if torch.reduce_all(unk) is True: logi = prior else: y = self.decode(tgt, ctx) if i is not None: y = y[:, i, :] sh = y.shape # torch.int_shape(y) y = torch.reshape(y, (-1, sh[-1])) y = self.logits(y) y = torch.reshape(y, sh[:-1] + y.shape[-1:]) u = torch.expand_dims(unk, axis=2) u = torch.broadcast_to(u, y.shape) logi = torch.where(u, y, prior) logp = y - torch.reduce_logsumexp(y, axis=-1, keepdims=True) return logp, logi, unk
def _loss(i): y = torch.log_softmax(pred[i], axis=-1) y = torch.one_hot(span[:, i], self.slen) * y return -torch.reduce_mean(torch.reduce_sum(y, axis=-1))
def test(opt, test_loader, model, writer, epoch, num_batches): loss = 0 margin_loss = 0 recons_loss = 0 correct = 0 step = epoch * num_batches + num_batches model.eval() for data, target in test_loader: # Store the indices for calculating accuracy label = target.unsqueeze(0).type(torch.LongTensor) batch_size = data.size(0) # Transform to one-hot indices: [batch_size, 10] target = torch.one_hot((batch_size, 10), target.view(-1, 1)) assert target.size() == torch.Size([batch_size, 10]) # Use GPU if available with torch.no_grad(): data, target = Variable(data), Variable(target) if opt.use_cuda & torch.cuda.is_available(): data, target = data.cuda(), target.cuda() # Output predictions output = model(data) L, m_loss, r_loss = model.loss(output, target, data) loss += L.item() margin_loss += m_loss.item() recons_loss += r_loss.item() # Count correct numbers # norms: [batch_size, 10, 16] norms = torch.sqrt(torch.sum(output**2, dim=2)) # pred: [batch_size,] pred = norms.data.max(1, keepdim=True)[1].type(torch.LongTensor) correct += pred.eq(label.view_as(pred)).cpu().sum().item() # Visualize reconstructed images of the last batch recons = model.Decoder(output, target) recons = recons.view(batch_size, 1, 28, 28) recons = vutils.make_grid(recons.data, normalize=True, scale_each=True) writer.add_image('Image-{}'.format(step), recons, step) # Log test losses loss /= len(test_loader) margin_loss /= len(test_loader) recons_loss /= len(test_loader) acc = correct / len(test_loader.dataset) writer.add_scalar('test/loss', loss, step) writer.add_scalar('test/marginal_loss', margin_loss, step) writer.add_scalar('test/reconstruction_loss', recons_loss, step) writer.add_scalar('test/accuracy', acc, step) # Print test losses print('\nTest loss: {:.4f} Marginal loss: {:.4f} Recons loss: {:.4f}'. format(loss, margin_loss, recons_loss)) print('Accuracy: {}/{} ({:.0f}%)\n'.format( correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset))) # Checkpoint model torch.save( model, './ckpt/epoch_{}-loss_{:.6f}-acc_{:.6f}.pt'.format(epoch, loss, acc))