def test_SampledSoftmaxLoss(self): # A simple test to verify the numerics. def _SoftmaxCrossEntropyWithLogits(logits, targets): # logits, targets: float arrays of the same shape. assert logits.shape == targets.shape stable_exp_logits = np.exp(logits - np.amax(logits, axis=1, keepdims=True)) pred = stable_exp_logits / np.sum( stable_exp_logits, 1, keepdims=True) return -np.sum(targets * np.log(pred + 1.0e-20), axis=1) np.random.seed(1000) num_classes = 5 batch_size = 3 nsampled = 4 nhid = 10 labels = [0, 1, 2] (weights, biases, hidden_acts, sampled_vals, exp_logits, exp_labels) = self._GenerateTestData(num_classes=num_classes, dim=nhid, batch_size=batch_size, num_true=1, labels=labels, sampled=[1, 0, 2, 3], subtract_log_q=True) exp_sampled_softmax_loss = np.mean( _SoftmaxCrossEntropyWithLogits(exp_logits, exp_labels)) ss = model.SampledSoftmax(num_classes, nsampled, nhid, tied_weight=None) ss.params.weight.data = torch.from_numpy(weights) ss.params.bias.data = torch.from_numpy(biases) ss.params.cuda() hidden_acts = Variable(torch.from_numpy(hidden_acts)).cuda() labels = Variable(torch.LongTensor(labels)).cuda() logits, new_targets = ss.sampled(hidden_acts, labels, sampled_vals) self.assertTrue( EXPECT_NEAR(exp_logits, logits.data.cpu().numpy(), 1e-4)) criterion = nn.CrossEntropyLoss() loss = criterion(logits.view(-1, nsampled + 1), new_targets) self.assertTrue( EXPECT_NEAR(exp_sampled_softmax_loss, loss.data[0], 1e-4))
def test_AccidentalMatch(self): np.random.seed(1000) num_classes = 5 batch_size = 3 nsampled = 4 nhid = 10 labels = np.random.randint(low=0, high=num_classes, size=batch_size) (weights, biases, hidden_acts, sampled_vals, exp_logits, exp_labels) = self._GenerateTestData(num_classes=num_classes, dim=nhid, batch_size=batch_size, num_true=1, labels=labels, sampled=[1, 0, 2, 3], subtract_log_q=True) ss = model.SampledSoftmax(num_classes, nsampled, nhid, tied_weight=None) ss.params.weight.data = torch.from_numpy(weights) ss.params.bias.data = torch.from_numpy(biases) ss.params.cuda() hidden_acts = Variable(torch.from_numpy(hidden_acts)).cuda() labels = Variable(torch.LongTensor(labels)).cuda() sampler = LogUniformSampler(nsampled) sampled_values = sampler.sample(nsampled, labels.data.cpu().numpy()) sample_ids, true_freq, sample_freq = sampled_values logits, new_targets = ss.sampled(hidden_acts, labels, sampled_values, remove_accidental_match=True) criterion = nn.CrossEntropyLoss() loss = criterion(logits.view(-1, nsampled + 1), new_targets) np_logits = logits.data.cpu().numpy() for row in range(batch_size): label = labels[row] for col in range(nsampled): if sample_ids[col] == label: self.assertTrue( EXPECT_NEAR(np.exp(np_logits[row, col + 1]), 0, 1e-4))
# Build the model ############################################################################### eval_batch_size = 1 net = model.RNNModel(ntokens, args.emsize, args.nhid, args.emsize, args.nlayers, args.proj, args.dropout) encoder = nn.Embedding(ntokens, args.emsize) util.initialize(encoder.weight) twht = None if args.tied: if args.nhid != args.emsize and not args.proj: raise ValueError('When using the tied flag, hidden must be equal to embedding size') twht = encoder.weight D = args.emsize if args.proj else args.nhid ss = model.SampledSoftmax(ntokens, nsampled, D, tied_weight=twht) net.add_module("encoder", encoder) net.add_module("decoder", ss) net.cuda() criterion = nn.CrossEntropyLoss() optimizer = optim.Adagrad(net.parameters(), args.lr, weight_decay=1e-6) ############################################################################### # Training code ############################################################################### def repackage_hidden(h, device_id=0): """Wraps hidden states in new Variables, to detach them from their history.""" if isinstance(h, Variable):
############################################################################### eval_batch_size = 1 net = model.RNNModel(ntokens, args.emsize, args.nhid, args.nlayers, args.dropout) encoder = nn.Embedding(ntokens, args.emsize) util.initialize(encoder, ntokens) twht = None if args.tied: if args.nhid != args.emsize: raise ValueError( 'When using the tied flag, hidden must be equal to embedding size') twht = encoder.weight ss = model.SampledSoftmax(ntokens, nsampled, args.nhid, tied_weight=twht) net.add_module("encoder", encoder) net.add_module("decoder", ss) net.cuda(0) encoder.cuda(0) ss.cuda(0) criterion = nn.CrossEntropyLoss() ############################################################################### # Training code ###############################################################################