Example #1
0
def generate_ngram_naive_bayes_model(training_iter, alpha):
    labelCounts = ntorch.ones(len(LABEL.vocab), names=("class")).cuda() * 0
    vocabCounts = ntorch.tensor(
        [alpha[f[0]]
         for f in NGRAMS.vocab.itos], names=("vocab", )).cuda() * ntorch.ones(
             len(LABEL.vocab), names=("class", )).cuda()
    classes = ntorch.tensor(torch.eye(len(LABEL.vocab)),
                            names=("class", "classIndex")).cuda()
    encoding = ntorch.tensor(torch.eye(len(NGRAMS.vocab)),
                             names=("vocab", "vocabIndex")).cuda()
    for batch in training_iter:
        oneHot = encoding.index_select("vocabIndex", batch.text)
        setofwords, _ = oneHot.max("ngramlen")
        classRep = classes.index_select("classIndex", batch.label.long())
        labelCounts += classRep.sum("batch")
        vocabCounts += setofwords.dot("batch", classRep)

    p = vocabCounts.get("class", 1)
    q = vocabCounts.get("class", 0)
    r = ((p * q.sum()) / (q * p.sum())).log()
    # r= (p/q).log()
    weight = r
    b = (labelCounts.get("class", 1) / labelCounts.get("class", 0)).log()

    def naive_bayes(test_batch):
        oneHotTest = encoding.index_select("vocabIndex", test_batch.cuda())
        setofwords, _ = oneHotTest.max("seqlen")
        y = (weight.dot("vocab", setofwords) + b).sigmoid()
        return (y - 0.5) * (ntorch.tensor([-1., 1.],
                                          names=("class")).cuda()) + 0.5

    return naive_bayes
Example #2
0
    def __init__(self, dataset, vocabSize, batchSize, alpha=1):
        super(naiveBayesModel, self).__init__()

        self.vocabSize = vocabSize
        #schematically:
        N_p = 0
        N_m = 0
        p = ntorch.tensor(torch.ones(vocabSize) * alpha,
                          ['vocab']).cuda()  #batchsize?
        q = ntorch.tensor(torch.ones(vocabSize) * alpha,
                          ['vocab']).cuda()  #batchsize?

        ones = ntorch.tensor(torch.ones(vocabSize, batchSize),
                             ['vocab', 'batch'])
        zeros = ntorch.tensor(torch.zeros(vocabSize, batchSize),
                              ['vocab', 'batch'])

        for i, batch in enumerate(dataset):
            if i % 100 == 0: print(f"iteration {i}")

            #gets binarized set-of-words
            f = self.convertToX(batch.text)
            #f = torch.where(x > 0, torch.ones(f.size()), torch.zeros(f.size()))  # TODO

            #p += ntorch.where(batch.label==1., ones, zeros).sum('batch')
            #q += ntorch.where(batch.label==0., ones, zeros).sum('batch')

            p += ntorch.dot("batch", batch.label.float(),
                            f)  #hopefully this works
            q += ntorch.dot("batch", (batch.label == 0.).float(),
                            f)  #hopefully this works

            #print("q update:", (batch.label==0.).float())
            #print("p update:", batch.label.float())
            #assert False

            _n = batch.label.sum("batch").item()
            N_p += _n
            N_m += batchSize - _n
        r = ntorch.log((p / p.sum('vocab').item())) - ntorch.log(
            q / q.sum('vocab').item())  # TODO

        self.W = r
        self.b = ntorch.tensor(math.log(N_p / N_m), []).cuda()  # TODO

        print("b", self.b)
        print("W", self.W)

        print("N_p", N_p)
        print("N_m", N_m)
        print("sum W", self.W.sum("vocab"))
Example #3
0
    def convertToX(self, batchText):
        #this function makes the feature vectors wth scatter
        x = ntorch.tensor(
            torch.zeros(self.vocabSize,
                        batchText.shape['batch'],
                        device=device), ('vocab', 'batch'))
        y = ntorch.tensor(
            torch.ones(batchText.shape['seqlen'],
                       batchText.shape['batch'],
                       device=device), ('seqlen', 'batch'))

        x.scatter_('vocab', batchText, y, 'seqlen')
        #print("len x:", len(x))
        return x
Example #4
0
    def convertToX(self, batchText):
        #this function makes the feature vectors wth scatter
        x = ntorch.tensor(
            torch.zeros(self.vocabSize, batchText.shape['batch']).cuda(),
            ('vocab', 'batch'))
        y = ntorch.tensor(
            torch.ones(batchText.shape['seqlen'], batchText.shape['batch']),
            ('seqlen', 'batch')).cuda()
        x.scatter_('vocab', batchText, y, 'seqlen')

        #print("x", x)
        #print(x.sum("vocab"))

        return x
Example #5
0
 def _shift_trg(self, trg):
     start_of_sent = [[BOS_IND] * trg.shape['batch']]
     start_of_sent = ntorch.tensor(start_of_sent,
                                   names=('trgSeqlen', 'batch'))
     end_of_sent = trg[{'trgSeqlen': slice(0, trg.shape['trgSeqlen'] - 1)}]
     shifted = ntorch.cat((start_of_sent, end_of_sent), 'trgSeqlen')
     return shifted
Example #6
0
def get_prediction_iter(iterator, model, TEXT, aa_compress, mask_tbl, device):
    ''' Predict outputs from sequence'''

    model.to(device)
    model.eval()
    output = []
    with torch.no_grad():
        for batch in iterator:
            seq_len = batch.sequence.shape["seqlen"]
            text = batch.sequence.narrow("seqlen", 0, seq_len - 1)
            target = batch.sequence.narrow("seqlen", 1, seq_len - 1)
            # Forward
            predictions = model(text, aa_compress(target))
            # Mask all outputs that don't work
            # Note: first, we clone the targets and then we switch the first target
            #   codon into the start codon to ensure that we allow for all possible
            #   start codons to be predicted!
            mask_targets = target.clone()
            mask_targets[{"seqlen": 0}] = TEXT.vocab.stoi["<start>"]
            mask_bad_codons = ntorch.tensor(mask_tbl[mask_targets.values],
                                            names=("seqlen", "batch",
                                                   "vocablen")).float()
            predictions = (mask_bad_codons + predictions.float())
            predictions = predictions.argmax("vocablen")
            output.append(predictions)

    return output
Example #7
0
    def forward(self, sent1, sent2, labels=None):
        """Notation straight from paper this time"""
        a_, b_ = self.input(sent1, sent2)

        # Attention
        F_a_ = self.attend(a_)
        F_b_ = self.attend(b_)

        e = F_a_.dot('hidden', F_b_)
        alpha = e.softmax(dim='seqlenA').dot('seqlenA', a_)
        beta = e.softmax(dim='seqlenB').dot('seqlenB', b_)

        # Comparison
        v1 = self.compare(ntorch.cat([a_, beta], 'embedding'))
        v2 = self.compare(ntorch.cat([b_, alpha], 'embedding'))

        # Aggregation
        v1 = v1.sum('seqlenA')
        v2 = v2.sum('seqlenB')
        output = self.aggregate(ntorch.cat([v1, v2], 'hidden'))

        if self.use_labels:
            assert labels is not None
            y = ntorch.tensor(labels.values.unsqueeze(1),
                              names=('batch', 'hidden')).cuda()
            output = self.labelled_output(
                ntorch.cat([output, y.float()], 'hidden'))
        y_hat = self.output(output)
        return y_hat, F_a_, F_b_
Example #8
0
    def elbo_reinforce(self, premise, hypothesis, label):
        # computing the q distribution: p(c | a, b, y)
        q = self.q(premise, hypothesis, label).rename('label', 'latent')
        latent_dist = ds.Categorical(logits=q, dim_logit='latent')

        # generating some samples
        samples = latent_dist.sample([self.sample_size], names=('samples', ))

        # bucketing samples by the sampled model to maximize efficiency
        buckets = defaultdict(list)
        premise_lst = premise.unbind('batch')
        hypothesis_lst = hypothesis.unbind('batch')

        samples_list = samples.transpose('batch', 'samples').tolist()
        for i, batch in enumerate(samples_list):
            p, h = premise_lst[i], hypothesis_lst[i]
            for sample in batch:
                buckets[sample].append((i, p, h))

        # evaluating the sampled models efficiently using batching
        orig_batch_size = premise.shape['batch']
        counts = [0] * orig_batch_size
        res = [None] * (self.sample_size * orig_batch_size)

        correct = label.tolist()
        for c, items in buckets.items():
            # stacking data points into batches
            batch_premise = ntorch.stack([p for _, p, _ in items], 'batch')
            batch_hypothesis = ntorch.stack([h for _, _, h in items], 'batch')
            ids = [i for i, _, _ in items]

            # evaluating the model on that batch
            predictions = self.models[c](batch_premise, batch_hypothesis)

            # updating the result at the appropriate index
            for i, log_probs in zip(ids, predictions.unbind('batch')):
                res[self.sample_size * i +
                    counts[i]] = log_probs.values[correct[i]]
                counts[i] += 1

        # reforming and averaging the results for each sample
        res = torch.stack(res, dim=0).reshape(orig_batch_size,
                                              self.sample_size)
        res = ntorch.tensor(res, names=('batch', 'sample'))

        # computing a surrogate objective for REINFORCE
        # https://pyro.ai/examples/svi_part_iii.html
        q_log_prob = latent_dist.log_prob(samples)
        surrogate_objective = (q_log_prob * res.detach() + res).mean('sample')

        # adding on the KL regularizing term
        ones = ntorch.ones(self.K, names='latent').log_softmax(dim='latent')
        uniform_dist = ds.Categorical(logits=ones, dim_logit='latent')
        kl = ds.kl_divergence(latent_dist, uniform_dist) * self.kl_importance

        # reporting the surrogate objective as well as the actual elbo
        loss = -(surrogate_objective - kl).mean()
        elbo = -(res.detach().mean('sample') - kl.detach()).mean()
        return loss, elbo
Example #9
0
    def reinforce(self, premise, hypothesis, label):
        # REINFORCE
        q = self.q(premise, hypothesis, label).rename('label', 'latent')
        latent_dist = nds.Categorical(logits=q, dim_logit='latent')
        # Sample to appromixate E[]
        samples = latent_dist.sample([self.num_samples], names=('samples', ))

        # Batch premises and hypotheses
        batches = defaultdict(list)
        premise_n = premise.unbind('batch')
        hypothesis_n = hypothesis.unbind('batch')

        # Get some samples
        samples_n = samples.transpose('batch', 'samples').tolist()

        # Idea is to work with samples based on their sampled model
        for i, batch in enumerate(samples_n):
            p = premise_n[i]
            h = hypothesis_n[i]
            for sample in batch:
                batches[sample].append((i, p, h))

        # Can now evaluate sampled models with batching
        batch_size = premise.shape['batch']
        counts = [0] * batch_size
        res = [None] * (self.num_samples * batch_size)

        correct = label.tolist()
        for i, items in batches.items():
            # for item in items:
            #     batch_p = ntorch.
            batch_p = ntorch.stack([p for _, p, _ in items], 'batch')
            batch_h = ntorch.stack([h for _, _, h in items], 'batch')
            batch_i = [i for i, _, _ in items]

            # Evaluate model per batch, then update
            preds = self.models[i](batch_p, batch_h)
            for i, log_probs in zip(batch_i, preds.unbind('batch')):
                res[self.num_samples * i +
                    counts[i]] = log_probs.values[correct[i]]
                counts[i] += 1

        # Finally average results for sample
        res = torch.stack(res, dim=0).reshape(batch_size, self.num_samples)
        res = ntorch.tensor(res, names=(
            'batch',
            'sample',
        ))

        # Onward to estimating gradient + calculating loss
        surrogate = (latent_dist.log_prob(samples) * res.detach() +
                     res).mean('sample')
        prior = ntorch.ones(self.K, names='latent').log_softmax(dim='latent')
        prior = nds.Categorical(logits=prior, dim_logit='latent')
        KLD = nds.kl_divergence(latent_dist, prior) * self.kl_weight

        loss = (KLD - surrogate._tensor).mean()  # -(surrogate = kl)
        elbo = (KLD.detach() - res.detach().mean('sample')._tensor).mean()
        return loss, elbo
Example #10
0
    def decode_one_step(self, t, output_seq, score, state, enc_out):

        if self.attention:

            def attend(x_t):
                alpha = enc_out.dot("rnnOutput", x_t).softmax("srcSeqlen")
                context = alpha.dot("srcSeqlen", enc_out)
                return context

        h, c = state[-1]
        next_input = output_seq[{"trgSeqlen": slice(t, t + 1)}].long()

        x_t, (h, c) = self.decoder(self.out_embedding(next_input), (h, c))

        if self.attention:
            fc = self.fc(ntorch.cat([attend(x_t), x_t], dim="rnnOutput"))
        else:
            fc = self.fc(x_t)

        fc = fc.sum("trgSeqlen")
        fc = fc.log_softmax("outVocab")
        state = x_t, (h, c)

        #can instead use argmax ...
        #next_tokens = fc.argmax("")
        #ntorch.tensor(topk, names=dim_names)
        #max, argmax = fc.topk("dim2", k)

        k = 100
        _, argmax = fc.topk("outVocab", k)
        #print("argmax", argmax)
        lst = []
        for i in range(k):  #TODO fix this line or whatever
            import copy
            output_seq = copy.deepcopy(output_seq)

            output_seq[{
                "trgSeqlen": t + 1
            }] = argmax[{
                "outVocab": i
            }]  #TODO fix this line or whatever

            next_token = output_seq[{"trgSeqlen": slice(t + 1, t + 2)}].long()

            indices = next_token.sum("trgSeqlen").rename("batch", "indices")
            batch_indices = ntorch.tensor(
                torch.tensor(np.arange(fc.shape["batch"]), device=device),
                ("batchIndices"))
            newsc = fc.index_select("outVocab", indices).index_select(
                "indices", batch_indices).get("batchIndices", 0)
            score[{"trgSeqlen": t + 1}] = newsc

            assert output_seq[{
                "trgSeqlen": t + 1
            }].long() == next_token.sum("trgSeqlen")  #todo
            #output_dists[{"trgSeqlen":t+1}] = fc

            lst.append((output_seq, score, state))
        return lst
Example #11
0
 def forward(self, x, target):
     assert x.shape[self.dim_classes] == self.size
     target_dist = ntorch.tensor(x.values, names=x.dims).fill_(
         self._off_prob
     )
     target_dist[{self.dim_classes: target}] = self._on_prob
     target_dist[{self.dim_classes: self.padding_idx}] = 0
     on = {self.dim_batch: (target != self.padding_idx).nonzero()}
     return self.criterion(x[on].values, target_dist[on].values)
Example #12
0
 def forward(self, premise, hypothesis, target):
     prem = self.embedding(premise)
     prem = self.lstm_prem(prem)[0][{'seqlen': -1}]
     hyp = self.embedding(hypothesis)
     hyp = self.lstm_hyp(hyp)[0][{'seqlen': -1}]
     tar = ntorch.tensor(target.values.reshape(1, -1).to(torch.float32),
                         names=('embedding', 'batch'))
     flat = ntorch.cat([hyp, prem, tar], 'embedding')
     out = self.linear(flat).log_softmax('logprob')
     return out
Example #13
0
def train(args, model, device, train_loader, optimizer, epoch):
    model.train()
    lmod = ntorch.nn.NLLLoss(reduction="none")
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        data = ntorch.tensor(data, ("b", "c", "h", "w"))
        target = ntorch.tensor(target, ("b", ))
        optimizer.zero_grad()
        output = model(data)
        loss = lmod(output, target).mean("b")
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print("Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
                epoch,
                batch_idx * len(data),
                len(train_loader.dataset),
                100.0 * batch_idx / len(train_loader),
                loss.item(),
            ))
Example #14
0
 def convertToX(self, batchText):
     # import ipdb; ipdb.set_trace()
     #this function makes the feature vectors wth scatter
     # x = ntorch.tensor( torch.zeros(self.vocabSize, batchText.shape['batch'], device=device), ('vocab', 'batch'))
     # y = ntorch.tensor( torch.ones(batchText.shape['seqlen'], batchText.shape['batch'], device=device), ('seqlen', 'batch'))
     # one_hot_vectors = ntorch.tensor(torch.diag(torch.ones(self.vocabSize)), ('vocab', 'lookup'))
     pretrained_embeddings = ntorch.tensor(TEXT.vocab.vectors,
                                           ('lookup', 'vocab'))
     x = pretrained_embeddings.index_select('lookup', batchText)
     # x.scatter_('vocab', batchText, y, 'seqlen')
     #print("len x:", len(x))
     return x
Example #15
0
    def forward(self, seq, c0, h0):
        cells = ntorch.cat([h0, c0], ["hdcell", "placecell"], name="cells")
        initial_state = (self.init_cell(cells), self.init_state(cells))

        out, _ = self.rnn(seq, initial_state)

        g = F.dropout(
            self.g(out).transpose("batch", "g", "t").values, 0.5,
            self.training)
        g = ntorch.tensor(g, names=("batch", "g", "t"))

        return self.head(g), self.place(g), g
Example #16
0
    def state_to_tensor(self, states):
        inputs, scratchs, committeds, outputs, masks, last_actions = zip(
            *states)

        inputs = np.stack(inputs)
        input_tensor = ntorch.tensor(inputs, ('batch', 'Examples', 'strLen'))
        scratchs = np.stack(scratchs)
        scratch_tensor = ntorch.tensor(scratchs,
                                       ('batch', 'Examples', 'strLen'))
        committeds = np.stack(committeds)
        committed_tensor = ntorch.tensor(committeds,
                                         ('batch', 'Examples', 'strLen'))
        outputs = np.stack(outputs)
        output_tensor = ntorch.tensor(outputs, ('batch', 'Examples', 'strLen'))
        chars = ntorch.stack(
            [input_tensor, scratch_tensor, committed_tensor, output_tensor],
            'stateLoc')
        chars = chars.transpose('batch', 'Examples', 'strLen',
                                'stateLoc').long()
        # print(chars.shape)
        masks = np.stack(masks)
        masks = ntorch.tensor(masks,
                              ('batch', 'Examples', 'inFeatures', 'strLen'))
        # print(masks.shape)
        masks = masks.transpose('batch', 'Examples', 'strLen',
                                'inFeatures').float()

        last_actions = np.stack(last_actions)
        last_actions = ntorch.tensor(last_actions, 'batch').long()

        if self.use_cuda:
            return chars.cuda(), masks.cuda(), last_actions.cuda()
        else:
            return chars, masks, last_actions
Example #17
0
    def forward(self, src, trg, shift_trg=True):
        src = src._force_order(("batch", "srcSeqlen")).values

        # do this while training
        if shift_trg:
            trg = self._shift_trg(trg)

        trg = trg._force_order(("batch", "trgSeqlen")).values
        src_mask, trg_mask = self.make_masks(src, trg)
        enc_src = self.encoder(src, src_mask)
        out = self.decoder(trg, enc_src, trg_mask, src_mask)
        out = ntorch.tensor(out, ("batch", "trgSeqlen", "vocab"))
        return out.log_softmax("vocab")
Example #18
0
	def forward(self, text, aa_info):
		''' 
		  Pass in context for the next amino acid
		'''
		
		# Reset for each new batch...
		h_0 = ntorch.zeros(text.shape["batch"], self.num_layers, self.hiddenlen, 
							names=("batch", "layers", "hiddenlen")).to(self.device)
		c_0 = ntorch.zeros(text.shape["batch"], self.num_layers, self.hiddenlen, 
							names=("batch", "layers", "hiddenlen")).to(self.device)
	 
		# If we should use all the sequence as input
		if self.teacher_force_prob == 1: 
		  text_embedding = self.embedding(text)
		  hidden_states, (h_n, c_n) = self.LSTM(text_embedding, (h_0, c_0))
		  output = self.linear_dropout(hidden_states)
		  output = ntorch.cat([output, aa_info], dim="hiddenlen")
		  output = self.linear(output)
		
		# If we should use some combination of teacher forcing
		else: 
			# Use for teacher forcing...
			outputs = []
			model_input = text[{"seqlen" : slice(0, 1)}]
			h_n, c_n = h_0, c_0
			for position in range(text.shape["seqlen"]): 
				text_embedding = self.embedding(model_input)
				hidden_states, (h_n, c_n) = self.LSTM(text_embedding, (h_n, c_n))

				output = self.linear_dropout(hidden_states)
				aa_info_subset = aa_info[{"seqlen" : slice(position, position+1)}]
				output = ntorch.cat([output, aa_info_subset], dim="hiddenlen")
				output = self.linear(output)
				outputs.append(output)

				# Define next input... 
				if random.random() < self.teacher_force_prob: 
					model_input = text[{"seqlen" : slice(position, position+1)}]
				else: 
					# Masking output... 
					mask_targets = text[{"seqlen" : slice(position, position+1)}].clone()
					if position == 0: 
						mask_targets[{"seqlen" : 0}] = TEXT.vocab.stoi["<start>"]
					mask_bad_codons = ntorch.tensor(mask_tbl[mask_targets.values], 
						names=("seqlen", "batch", "vocablen")).float()

					model_input = (output + mask_bad_codons).argmax("vocablen")
					# model_input = (output).argmax("vocablen")
			  
			output = ntorch.cat(outputs, dim="seqlen")
		return output
Example #19
0
    def __init__(self, scale, n, scene, seed=None):
        if isinstance(scene, SquareCage):
            rs = np.random.RandomState(seed)
            place_cells = rs.uniform(-scene.height / 2,
                                     scene.height / 2,
                                     size=(n, 2))
        else:
            place_cells = scene.random(n)
        self.centers = ntorch.tensor(place_cells,
                                     names=("placecell", "ax")).float().cuda()
        self.scale = scale

        plt.scatter(*place_cells.T)
        plt.show()
Example #20
0
def get_batch(traj, place_cells, hd_cells, dims=None, pos=False):
    if dims is None:
        dims = DIMS
    ntraj = [ntorch.tensor(i, names=n).cuda() for i, n in zip(traj, dims)]
    target_pos, target_hd, ego_vel, init_pos, init_hd = ntraj
    cs, c0 = place_cells(target_pos), place_cells(init_pos)
    hs, h0 = hd_cells(target_hd), hd_cells(init_hd)

    hs = hs[{'hd': 0}]
    h0 = h0[{'hd': 0}]

    if pos:
        return cs, hs, ego_vel, c0, h0, target_pos

    return cs, hs, ego_vel, c0, h0
Example #21
0
def get_prediction(batch, model, aa_compress, mask_tbl, device): 
	''' Predict outputs from sequence'''
	model.to(device)
	model.eval()
	with torch.no_grad():
		seq_len = batch.sequence.shape["seqlen"]
		text = batch.sequence.narrow("seqlen", 0, seq_len - 1)
		target = batch.sequence.narrow("seqlen", 1, seq_len - 1)
		# Forward
		predictions = model(text, aa_compress(target)) 
		mask_bad_codons = ntorch.tensor(mask_tbl[target.values], 
							 names=("seqlen", "batch", "vocablen")).float()
		predictions = (mask_bad_codons + predictions.float())
		predictions = predictions.argmax("vocablen")
	return predictions
Example #22
0
def test(args, model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    lmod = ntorch.nn.NLLLoss(reduction="none")
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            data = ntorch.tensor(data, ("b", "c", "h", "w"))
            target = ntorch.tensor(target, ("b", ))
            output = model(data)
            test_loss = lmod(output, target).sum("b").item()
            pred = output.max("classes")[1]
            correct += (pred == target).sum("b").item()

    test_loss /= len(test_loader.dataset)

    print(
        "\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format(
            test_loss,
            correct,
            len(test_loader.dataset),
            100.0 * correct / len(test_loader.dataset),
        ))
Example #23
0
def make_translation_predictions(model, use_bs2=False):
    print('Generating translations')
    with open('test_predictions.txt', 'w') as outfile:
        with open('source_test.txt', 'r') as infile:
            for line in tqdm(list(infile)):
                tokens = [DE.vocab.stoi[w] for w in tokenize_de(line.strip())]
                src = ntorch.tensor(tokens, names="srcSeqlen")
                if use_bs2:
                    translation = beam_search2(model,
                                               src,
                                               beam_size=5,
                                               num_results=10)[0]
                else:
                    translation = beam_search(model,
                                              src,
                                              beam_size=5,
                                              num_results=10)[0]

                assert translation[0] == BOS_IND
                sent = ' '.join(EN.vocab.itos[i] for i in translation[1:])
                outfile.write(sent + '\n')
Example #24
0
    def align(self, s):
        intra_s = self.intra_layers(s)
        intra_s_ = intra_s.values.transpose(0, 1)  # formatting
        batch_seq = torch.bmm(intra_s_, intra_s_.transpose(1, 2))

        batches = batch_seq.shape[0]
        seqlen = batch_seq.shape[1]

        align_matrix = torch.tensor([[(i - j) for j in range(seqlen)]
                                     for i in range(seqlen)])

        align_matrix = torch.clamp(align_matrix, -self.cap, self.cap)
        align_matrix = align_matrix.unsqueeze(0).expand(
            batches, seqlen, seqlen)  # batch * seqlen * seqlen

        align_matrix_b = self.bias[align_matrix + self.cap]
        weights = torch.softmax(align_matrix_b + batch_seq, dim=2)
        s_ = torch.matmul(weights, s.values.transpose(0, 1))
        s_ = ntorch.tensor(s_, ('batch', 'seqlen', 'embedding'))

        return ntorch.cat([s, s_], 'embedding')
Example #25
0
    def forward(self, a, b, y=None):
        a_bar, b_bar = self.input(a, b)

        # ATTEND
        F_a = self.f(a_bar)
        F_b = self.f(b_bar)

        e_mat = F_a.dot('hidden', F_b)
        alpha = e_mat.softmax(dim='aSeqlen').dot('aSeqlen', a_bar)
        beta = e_mat.softmax(dim='bSeqlen').dot('bSeqlen', b_bar)

        # COMPARE AND AGGREGATE
        v1 = self.g(ntorch.cat([a_bar, beta], 'embedding')).sum('aSeqlen')
        v2 = self.g(ntorch.cat([b_bar, alpha], 'embedding')).sum('bSeqlen')

        # NOTE: currently adds log softmax layer after linear, use nllloss
        out = self.h(ntorch.cat([v1, v2], 'hidden'))
        if self.use_labels:
            y = ntorch.tensor(y.values.unsqueeze(1), names=('batch', 'hidden'))
            out = self.y_combine(ntorch.cat([out, y.float()], 'hidden'))
        yhat = self.final(out)
        return yhat
Example #26
0
    def self_align(self, a):
        # generate a' from a
        fintra_a = self.f_intra(a)

        unnamed_fintra = fintra_a.values.transpose(0, 1)
        fmat = torch.bmm(unnamed_fintra, unnamed_fintra.transpose(1, 2))
        # fmat = batch x seqlen x seqlen

        batches = fmat.shape[0]
        seqlen = fmat.shape[1]
        index_mat = torch.tensor([[(i - j) for j in range(seqlen)]
                                  for i in range(seqlen)])
        index_mat = torch.clamp(index_mat, -self.d_cap, self.d_cap)
        index_mat = index_mat.unsqueeze(0).expand(batches, seqlen, seqlen)

        dmat = self.bias[index_mat + self.d_cap]
        weights = torch.softmax(fmat + dmat, dim=2)
        aprime = torch.matmul(weights, a.values.transpose(0, 1))
        aprime = ntorch.tensor(aprime, ('batch', 'seqlen', 'embedding'))

        abar = ntorch.cat([a, aprime], 'embedding')
        return abar
Example #27
0
def make_kaggle_predictions(model, use_ks2=False):
    print('Generating Kaggle predictions')
    with open('kaggle_predictions.txt', 'w') as outfile:
        outfile.write('Id,Predicted\n')
        with open('source_test.txt', 'r') as infile:
            for i, line in enumerate(tqdm(list(infile))):
                tokens = [DE.vocab.stoi[w] for w in tokenize_de(line.strip())]
                src = ntorch.tensor(tokens, names="srcSeqlen")

                if use_ks2:
                    preds = kaggle_search2(model, src)
                else:
                    preds = kaggle_search(model, src)

                trigrams = []
                for trigram in preds:
                    assert len(trigram) == 3
                    trigram = escape('|'.join(EN.vocab.itos[i]
                                              for i in trigram))
                    trigrams.append(trigram)

                assert len(trigrams) == 100
                outfile.write(str(i) + ',' + ' '.join(trigrams) + '\n')
Example #28
0
    def elbo_exact(self, premise, hypothesis, label):
        # computing the q distribution: p(c | a, b, y)
        q = self.q(premise, hypothesis, label).rename('label', 'latent')
        latent_dist = ds.Categorical(logits=q, dim_logit='latent')

        one_hot_label = torch.eye(4).index_select(0, label.values)
        one_hot_label = ntorch.tensor(one_hot_label, names=('batch', 'label'))

        # computing p(y | a, b, c) for every c
        objective = 0
        q = q.exp()
        for c in range(len(self.models)):
            log_probs = self.models[c](premise, hypothesis)
            model_probs = q.get('latent', c)
            objective += (log_probs * one_hot_label).sum('label') * model_probs

        # adding on the KL regularizing term
        ones = ntorch.ones(self.K, names='latent').log_softmax(dim='latent')
        uniform_dist = ds.Categorical(logits=ones, dim_logit='latent')

        kl = ds.kl_divergence(latent_dist, uniform_dist) * self.kl_importance
        loss = -(objective.mean() - kl.mean())
        return loss, loss.detach()
Example #29
0
    def exact(self, premise, hypothesis, label):
        q = self.q(premise, hypothesis, label).rename('label', 'latent')
        latent_dist = nds.Categorical(logits=q, dim_logit='latent')

        one_hot = torch.eye(4, out=torch.cuda.FloatTensor()).index_select(
            0, label.values)
        one_hot = ntorch.tensor(one_hot, names=('batch', 'label'))

        # Calculate p(y | a, b, c) across all models K
        surrogate = 0
        q = q.exp()
        for c in range(len(self.models)):
            log_probs = self.models[c](premise, hypothesis)
            model_probs = q.get('latent', c)
            surrogate += (log_probs * one_hot).sum('label') * model_probs

        # KL regularization
        ones = ntorch.ones(self.K, names='latent').log_softmax(dim='latent')
        prior = nds.Categorical(logits=ones, dim_logit='latent')

        KLD = nds.kl_divergence(latent_dist, prior) * self.kl_weight
        loss = KLD.mean() - surrogate._tensor.mean(
        )  # -(surrogate.mean() - kl.mean())
        return loss, loss.detach()
Example #30
0
def joint_ppl_acc(data_iter,
                  model,
                  device,
                  aa_compress,
                  TEXT,
                  mask_tbl,
                  teacher_force=1):
    ''' Calculate perplexity and accuracy on data iter
	
		Args: 
			data_iter: Bucket iter
			model : Model that works over data iter
			device: device
			aa_compress: Helper model to consider dependnecies along model
			TEXT: text object over codons
			mask_tbl: Mask table 
			teacher_force: Whether to use teacher forcing or not. Default yes
	Returns: 
		{"acc": accuracy, "ppl": perplexity}
	'''

    model.to(device)
    model.eval()
    model.teacher_force_prob = teacher_force
    aa_compress.to(device)
    aa_compress.eval()
    ppl = 0
    num_total = 0
    num_correct = 0
    num_total = 0
    loss_function = ntorch.nn.CrossEntropyLoss(
        reduction="none").spec("vocablen")
    with torch.no_grad():
        for i, batch in enumerate(data_iter):

            # Select for all non zero tensors
            # Use this to find all indices that aren't padding
            seq_len = batch.sequence.shape["seqlen"]
            text = batch.sequence.narrow("seqlen", 0, seq_len - 1)
            target = batch.sequence.narrow("seqlen", 1, seq_len - 1)

            stacked_target = target.stack(dims=("batch", "seqlen"),
                                          name="seqlen")
            mask = (stacked_target != TEXT.vocab.stoi["<pad>"])
            prop_indices = (ntorch.nonzero(mask).get("inputdims", 0)).rename(
                "elements", "seqlen")
            # Forward
            predictions = model(text, aa_compress(target))

            # Mask all outputs that don't work
            #   start codons to be predicted!
            mask_targets = target.clone()
            mask_targets[{"seqlen": 0}] = TEXT.vocab.stoi["<start>"]
            mask_bad_codons = ntorch.tensor(mask_tbl[mask_targets.values],
                                            names=("seqlen", "batch",
                                                   "vocablen")).float()
            predictions = (mask_bad_codons.double() + predictions.double())

            # Stack the predictions into one long vector and get correct indices
            predictions = (predictions.stack(dims=("batch", "seqlen"),
                                             name="seqlen").index_select(
                                                 "seqlen", prop_indices))

            predictions_hard = predictions.argmax("vocablen")

            # Select correct indices from target
            stacked_target = (stacked_target.index_select(
                "seqlen", prop_indices))
            num_correct += (predictions_hard == stacked_target).sum().item()
            num_total += predictions_hard.shape["seqlen"]

            loss = loss_function(predictions, stacked_target)
            ppl += loss.sum().item()

            # For quick results, toggle this
            # if i == 20:
            #	break
    return {"acc": num_correct / num_total, "ppl": np.exp(ppl / num_total)}