def generate_ngram_naive_bayes_model(training_iter, alpha): labelCounts = ntorch.ones(len(LABEL.vocab), names=("class")).cuda() * 0 vocabCounts = ntorch.tensor( [alpha[f[0]] for f in NGRAMS.vocab.itos], names=("vocab", )).cuda() * ntorch.ones( len(LABEL.vocab), names=("class", )).cuda() classes = ntorch.tensor(torch.eye(len(LABEL.vocab)), names=("class", "classIndex")).cuda() encoding = ntorch.tensor(torch.eye(len(NGRAMS.vocab)), names=("vocab", "vocabIndex")).cuda() for batch in training_iter: oneHot = encoding.index_select("vocabIndex", batch.text) setofwords, _ = oneHot.max("ngramlen") classRep = classes.index_select("classIndex", batch.label.long()) labelCounts += classRep.sum("batch") vocabCounts += setofwords.dot("batch", classRep) p = vocabCounts.get("class", 1) q = vocabCounts.get("class", 0) r = ((p * q.sum()) / (q * p.sum())).log() # r= (p/q).log() weight = r b = (labelCounts.get("class", 1) / labelCounts.get("class", 0)).log() def naive_bayes(test_batch): oneHotTest = encoding.index_select("vocabIndex", test_batch.cuda()) setofwords, _ = oneHotTest.max("seqlen") y = (weight.dot("vocab", setofwords) + b).sigmoid() return (y - 0.5) * (ntorch.tensor([-1., 1.], names=("class")).cuda()) + 0.5 return naive_bayes
def __init__(self, dataset, vocabSize, batchSize, alpha=1): super(naiveBayesModel, self).__init__() self.vocabSize = vocabSize #schematically: N_p = 0 N_m = 0 p = ntorch.tensor(torch.ones(vocabSize) * alpha, ['vocab']).cuda() #batchsize? q = ntorch.tensor(torch.ones(vocabSize) * alpha, ['vocab']).cuda() #batchsize? ones = ntorch.tensor(torch.ones(vocabSize, batchSize), ['vocab', 'batch']) zeros = ntorch.tensor(torch.zeros(vocabSize, batchSize), ['vocab', 'batch']) for i, batch in enumerate(dataset): if i % 100 == 0: print(f"iteration {i}") #gets binarized set-of-words f = self.convertToX(batch.text) #f = torch.where(x > 0, torch.ones(f.size()), torch.zeros(f.size())) # TODO #p += ntorch.where(batch.label==1., ones, zeros).sum('batch') #q += ntorch.where(batch.label==0., ones, zeros).sum('batch') p += ntorch.dot("batch", batch.label.float(), f) #hopefully this works q += ntorch.dot("batch", (batch.label == 0.).float(), f) #hopefully this works #print("q update:", (batch.label==0.).float()) #print("p update:", batch.label.float()) #assert False _n = batch.label.sum("batch").item() N_p += _n N_m += batchSize - _n r = ntorch.log((p / p.sum('vocab').item())) - ntorch.log( q / q.sum('vocab').item()) # TODO self.W = r self.b = ntorch.tensor(math.log(N_p / N_m), []).cuda() # TODO print("b", self.b) print("W", self.W) print("N_p", N_p) print("N_m", N_m) print("sum W", self.W.sum("vocab"))
def convertToX(self, batchText): #this function makes the feature vectors wth scatter x = ntorch.tensor( torch.zeros(self.vocabSize, batchText.shape['batch'], device=device), ('vocab', 'batch')) y = ntorch.tensor( torch.ones(batchText.shape['seqlen'], batchText.shape['batch'], device=device), ('seqlen', 'batch')) x.scatter_('vocab', batchText, y, 'seqlen') #print("len x:", len(x)) return x
def convertToX(self, batchText): #this function makes the feature vectors wth scatter x = ntorch.tensor( torch.zeros(self.vocabSize, batchText.shape['batch']).cuda(), ('vocab', 'batch')) y = ntorch.tensor( torch.ones(batchText.shape['seqlen'], batchText.shape['batch']), ('seqlen', 'batch')).cuda() x.scatter_('vocab', batchText, y, 'seqlen') #print("x", x) #print(x.sum("vocab")) return x
def _shift_trg(self, trg): start_of_sent = [[BOS_IND] * trg.shape['batch']] start_of_sent = ntorch.tensor(start_of_sent, names=('trgSeqlen', 'batch')) end_of_sent = trg[{'trgSeqlen': slice(0, trg.shape['trgSeqlen'] - 1)}] shifted = ntorch.cat((start_of_sent, end_of_sent), 'trgSeqlen') return shifted
def get_prediction_iter(iterator, model, TEXT, aa_compress, mask_tbl, device): ''' Predict outputs from sequence''' model.to(device) model.eval() output = [] with torch.no_grad(): for batch in iterator: seq_len = batch.sequence.shape["seqlen"] text = batch.sequence.narrow("seqlen", 0, seq_len - 1) target = batch.sequence.narrow("seqlen", 1, seq_len - 1) # Forward predictions = model(text, aa_compress(target)) # Mask all outputs that don't work # Note: first, we clone the targets and then we switch the first target # codon into the start codon to ensure that we allow for all possible # start codons to be predicted! mask_targets = target.clone() mask_targets[{"seqlen": 0}] = TEXT.vocab.stoi["<start>"] mask_bad_codons = ntorch.tensor(mask_tbl[mask_targets.values], names=("seqlen", "batch", "vocablen")).float() predictions = (mask_bad_codons + predictions.float()) predictions = predictions.argmax("vocablen") output.append(predictions) return output
def forward(self, sent1, sent2, labels=None): """Notation straight from paper this time""" a_, b_ = self.input(sent1, sent2) # Attention F_a_ = self.attend(a_) F_b_ = self.attend(b_) e = F_a_.dot('hidden', F_b_) alpha = e.softmax(dim='seqlenA').dot('seqlenA', a_) beta = e.softmax(dim='seqlenB').dot('seqlenB', b_) # Comparison v1 = self.compare(ntorch.cat([a_, beta], 'embedding')) v2 = self.compare(ntorch.cat([b_, alpha], 'embedding')) # Aggregation v1 = v1.sum('seqlenA') v2 = v2.sum('seqlenB') output = self.aggregate(ntorch.cat([v1, v2], 'hidden')) if self.use_labels: assert labels is not None y = ntorch.tensor(labels.values.unsqueeze(1), names=('batch', 'hidden')).cuda() output = self.labelled_output( ntorch.cat([output, y.float()], 'hidden')) y_hat = self.output(output) return y_hat, F_a_, F_b_
def elbo_reinforce(self, premise, hypothesis, label): # computing the q distribution: p(c | a, b, y) q = self.q(premise, hypothesis, label).rename('label', 'latent') latent_dist = ds.Categorical(logits=q, dim_logit='latent') # generating some samples samples = latent_dist.sample([self.sample_size], names=('samples', )) # bucketing samples by the sampled model to maximize efficiency buckets = defaultdict(list) premise_lst = premise.unbind('batch') hypothesis_lst = hypothesis.unbind('batch') samples_list = samples.transpose('batch', 'samples').tolist() for i, batch in enumerate(samples_list): p, h = premise_lst[i], hypothesis_lst[i] for sample in batch: buckets[sample].append((i, p, h)) # evaluating the sampled models efficiently using batching orig_batch_size = premise.shape['batch'] counts = [0] * orig_batch_size res = [None] * (self.sample_size * orig_batch_size) correct = label.tolist() for c, items in buckets.items(): # stacking data points into batches batch_premise = ntorch.stack([p for _, p, _ in items], 'batch') batch_hypothesis = ntorch.stack([h for _, _, h in items], 'batch') ids = [i for i, _, _ in items] # evaluating the model on that batch predictions = self.models[c](batch_premise, batch_hypothesis) # updating the result at the appropriate index for i, log_probs in zip(ids, predictions.unbind('batch')): res[self.sample_size * i + counts[i]] = log_probs.values[correct[i]] counts[i] += 1 # reforming and averaging the results for each sample res = torch.stack(res, dim=0).reshape(orig_batch_size, self.sample_size) res = ntorch.tensor(res, names=('batch', 'sample')) # computing a surrogate objective for REINFORCE # https://pyro.ai/examples/svi_part_iii.html q_log_prob = latent_dist.log_prob(samples) surrogate_objective = (q_log_prob * res.detach() + res).mean('sample') # adding on the KL regularizing term ones = ntorch.ones(self.K, names='latent').log_softmax(dim='latent') uniform_dist = ds.Categorical(logits=ones, dim_logit='latent') kl = ds.kl_divergence(latent_dist, uniform_dist) * self.kl_importance # reporting the surrogate objective as well as the actual elbo loss = -(surrogate_objective - kl).mean() elbo = -(res.detach().mean('sample') - kl.detach()).mean() return loss, elbo
def reinforce(self, premise, hypothesis, label): # REINFORCE q = self.q(premise, hypothesis, label).rename('label', 'latent') latent_dist = nds.Categorical(logits=q, dim_logit='latent') # Sample to appromixate E[] samples = latent_dist.sample([self.num_samples], names=('samples', )) # Batch premises and hypotheses batches = defaultdict(list) premise_n = premise.unbind('batch') hypothesis_n = hypothesis.unbind('batch') # Get some samples samples_n = samples.transpose('batch', 'samples').tolist() # Idea is to work with samples based on their sampled model for i, batch in enumerate(samples_n): p = premise_n[i] h = hypothesis_n[i] for sample in batch: batches[sample].append((i, p, h)) # Can now evaluate sampled models with batching batch_size = premise.shape['batch'] counts = [0] * batch_size res = [None] * (self.num_samples * batch_size) correct = label.tolist() for i, items in batches.items(): # for item in items: # batch_p = ntorch. batch_p = ntorch.stack([p for _, p, _ in items], 'batch') batch_h = ntorch.stack([h for _, _, h in items], 'batch') batch_i = [i for i, _, _ in items] # Evaluate model per batch, then update preds = self.models[i](batch_p, batch_h) for i, log_probs in zip(batch_i, preds.unbind('batch')): res[self.num_samples * i + counts[i]] = log_probs.values[correct[i]] counts[i] += 1 # Finally average results for sample res = torch.stack(res, dim=0).reshape(batch_size, self.num_samples) res = ntorch.tensor(res, names=( 'batch', 'sample', )) # Onward to estimating gradient + calculating loss surrogate = (latent_dist.log_prob(samples) * res.detach() + res).mean('sample') prior = ntorch.ones(self.K, names='latent').log_softmax(dim='latent') prior = nds.Categorical(logits=prior, dim_logit='latent') KLD = nds.kl_divergence(latent_dist, prior) * self.kl_weight loss = (KLD - surrogate._tensor).mean() # -(surrogate = kl) elbo = (KLD.detach() - res.detach().mean('sample')._tensor).mean() return loss, elbo
def decode_one_step(self, t, output_seq, score, state, enc_out): if self.attention: def attend(x_t): alpha = enc_out.dot("rnnOutput", x_t).softmax("srcSeqlen") context = alpha.dot("srcSeqlen", enc_out) return context h, c = state[-1] next_input = output_seq[{"trgSeqlen": slice(t, t + 1)}].long() x_t, (h, c) = self.decoder(self.out_embedding(next_input), (h, c)) if self.attention: fc = self.fc(ntorch.cat([attend(x_t), x_t], dim="rnnOutput")) else: fc = self.fc(x_t) fc = fc.sum("trgSeqlen") fc = fc.log_softmax("outVocab") state = x_t, (h, c) #can instead use argmax ... #next_tokens = fc.argmax("") #ntorch.tensor(topk, names=dim_names) #max, argmax = fc.topk("dim2", k) k = 100 _, argmax = fc.topk("outVocab", k) #print("argmax", argmax) lst = [] for i in range(k): #TODO fix this line or whatever import copy output_seq = copy.deepcopy(output_seq) output_seq[{ "trgSeqlen": t + 1 }] = argmax[{ "outVocab": i }] #TODO fix this line or whatever next_token = output_seq[{"trgSeqlen": slice(t + 1, t + 2)}].long() indices = next_token.sum("trgSeqlen").rename("batch", "indices") batch_indices = ntorch.tensor( torch.tensor(np.arange(fc.shape["batch"]), device=device), ("batchIndices")) newsc = fc.index_select("outVocab", indices).index_select( "indices", batch_indices).get("batchIndices", 0) score[{"trgSeqlen": t + 1}] = newsc assert output_seq[{ "trgSeqlen": t + 1 }].long() == next_token.sum("trgSeqlen") #todo #output_dists[{"trgSeqlen":t+1}] = fc lst.append((output_seq, score, state)) return lst
def forward(self, x, target): assert x.shape[self.dim_classes] == self.size target_dist = ntorch.tensor(x.values, names=x.dims).fill_( self._off_prob ) target_dist[{self.dim_classes: target}] = self._on_prob target_dist[{self.dim_classes: self.padding_idx}] = 0 on = {self.dim_batch: (target != self.padding_idx).nonzero()} return self.criterion(x[on].values, target_dist[on].values)
def forward(self, premise, hypothesis, target): prem = self.embedding(premise) prem = self.lstm_prem(prem)[0][{'seqlen': -1}] hyp = self.embedding(hypothesis) hyp = self.lstm_hyp(hyp)[0][{'seqlen': -1}] tar = ntorch.tensor(target.values.reshape(1, -1).to(torch.float32), names=('embedding', 'batch')) flat = ntorch.cat([hyp, prem, tar], 'embedding') out = self.linear(flat).log_softmax('logprob') return out
def train(args, model, device, train_loader, optimizer, epoch): model.train() lmod = ntorch.nn.NLLLoss(reduction="none") for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) data = ntorch.tensor(data, ("b", "c", "h", "w")) target = ntorch.tensor(target, ("b", )) optimizer.zero_grad() output = model(data) loss = lmod(output, target).mean("b") loss.backward() optimizer.step() if batch_idx % args.log_interval == 0: print("Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format( epoch, batch_idx * len(data), len(train_loader.dataset), 100.0 * batch_idx / len(train_loader), loss.item(), ))
def convertToX(self, batchText): # import ipdb; ipdb.set_trace() #this function makes the feature vectors wth scatter # x = ntorch.tensor( torch.zeros(self.vocabSize, batchText.shape['batch'], device=device), ('vocab', 'batch')) # y = ntorch.tensor( torch.ones(batchText.shape['seqlen'], batchText.shape['batch'], device=device), ('seqlen', 'batch')) # one_hot_vectors = ntorch.tensor(torch.diag(torch.ones(self.vocabSize)), ('vocab', 'lookup')) pretrained_embeddings = ntorch.tensor(TEXT.vocab.vectors, ('lookup', 'vocab')) x = pretrained_embeddings.index_select('lookup', batchText) # x.scatter_('vocab', batchText, y, 'seqlen') #print("len x:", len(x)) return x
def forward(self, seq, c0, h0): cells = ntorch.cat([h0, c0], ["hdcell", "placecell"], name="cells") initial_state = (self.init_cell(cells), self.init_state(cells)) out, _ = self.rnn(seq, initial_state) g = F.dropout( self.g(out).transpose("batch", "g", "t").values, 0.5, self.training) g = ntorch.tensor(g, names=("batch", "g", "t")) return self.head(g), self.place(g), g
def state_to_tensor(self, states): inputs, scratchs, committeds, outputs, masks, last_actions = zip( *states) inputs = np.stack(inputs) input_tensor = ntorch.tensor(inputs, ('batch', 'Examples', 'strLen')) scratchs = np.stack(scratchs) scratch_tensor = ntorch.tensor(scratchs, ('batch', 'Examples', 'strLen')) committeds = np.stack(committeds) committed_tensor = ntorch.tensor(committeds, ('batch', 'Examples', 'strLen')) outputs = np.stack(outputs) output_tensor = ntorch.tensor(outputs, ('batch', 'Examples', 'strLen')) chars = ntorch.stack( [input_tensor, scratch_tensor, committed_tensor, output_tensor], 'stateLoc') chars = chars.transpose('batch', 'Examples', 'strLen', 'stateLoc').long() # print(chars.shape) masks = np.stack(masks) masks = ntorch.tensor(masks, ('batch', 'Examples', 'inFeatures', 'strLen')) # print(masks.shape) masks = masks.transpose('batch', 'Examples', 'strLen', 'inFeatures').float() last_actions = np.stack(last_actions) last_actions = ntorch.tensor(last_actions, 'batch').long() if self.use_cuda: return chars.cuda(), masks.cuda(), last_actions.cuda() else: return chars, masks, last_actions
def forward(self, src, trg, shift_trg=True): src = src._force_order(("batch", "srcSeqlen")).values # do this while training if shift_trg: trg = self._shift_trg(trg) trg = trg._force_order(("batch", "trgSeqlen")).values src_mask, trg_mask = self.make_masks(src, trg) enc_src = self.encoder(src, src_mask) out = self.decoder(trg, enc_src, trg_mask, src_mask) out = ntorch.tensor(out, ("batch", "trgSeqlen", "vocab")) return out.log_softmax("vocab")
def forward(self, text, aa_info): ''' Pass in context for the next amino acid ''' # Reset for each new batch... h_0 = ntorch.zeros(text.shape["batch"], self.num_layers, self.hiddenlen, names=("batch", "layers", "hiddenlen")).to(self.device) c_0 = ntorch.zeros(text.shape["batch"], self.num_layers, self.hiddenlen, names=("batch", "layers", "hiddenlen")).to(self.device) # If we should use all the sequence as input if self.teacher_force_prob == 1: text_embedding = self.embedding(text) hidden_states, (h_n, c_n) = self.LSTM(text_embedding, (h_0, c_0)) output = self.linear_dropout(hidden_states) output = ntorch.cat([output, aa_info], dim="hiddenlen") output = self.linear(output) # If we should use some combination of teacher forcing else: # Use for teacher forcing... outputs = [] model_input = text[{"seqlen" : slice(0, 1)}] h_n, c_n = h_0, c_0 for position in range(text.shape["seqlen"]): text_embedding = self.embedding(model_input) hidden_states, (h_n, c_n) = self.LSTM(text_embedding, (h_n, c_n)) output = self.linear_dropout(hidden_states) aa_info_subset = aa_info[{"seqlen" : slice(position, position+1)}] output = ntorch.cat([output, aa_info_subset], dim="hiddenlen") output = self.linear(output) outputs.append(output) # Define next input... if random.random() < self.teacher_force_prob: model_input = text[{"seqlen" : slice(position, position+1)}] else: # Masking output... mask_targets = text[{"seqlen" : slice(position, position+1)}].clone() if position == 0: mask_targets[{"seqlen" : 0}] = TEXT.vocab.stoi["<start>"] mask_bad_codons = ntorch.tensor(mask_tbl[mask_targets.values], names=("seqlen", "batch", "vocablen")).float() model_input = (output + mask_bad_codons).argmax("vocablen") # model_input = (output).argmax("vocablen") output = ntorch.cat(outputs, dim="seqlen") return output
def __init__(self, scale, n, scene, seed=None): if isinstance(scene, SquareCage): rs = np.random.RandomState(seed) place_cells = rs.uniform(-scene.height / 2, scene.height / 2, size=(n, 2)) else: place_cells = scene.random(n) self.centers = ntorch.tensor(place_cells, names=("placecell", "ax")).float().cuda() self.scale = scale plt.scatter(*place_cells.T) plt.show()
def get_batch(traj, place_cells, hd_cells, dims=None, pos=False): if dims is None: dims = DIMS ntraj = [ntorch.tensor(i, names=n).cuda() for i, n in zip(traj, dims)] target_pos, target_hd, ego_vel, init_pos, init_hd = ntraj cs, c0 = place_cells(target_pos), place_cells(init_pos) hs, h0 = hd_cells(target_hd), hd_cells(init_hd) hs = hs[{'hd': 0}] h0 = h0[{'hd': 0}] if pos: return cs, hs, ego_vel, c0, h0, target_pos return cs, hs, ego_vel, c0, h0
def get_prediction(batch, model, aa_compress, mask_tbl, device): ''' Predict outputs from sequence''' model.to(device) model.eval() with torch.no_grad(): seq_len = batch.sequence.shape["seqlen"] text = batch.sequence.narrow("seqlen", 0, seq_len - 1) target = batch.sequence.narrow("seqlen", 1, seq_len - 1) # Forward predictions = model(text, aa_compress(target)) mask_bad_codons = ntorch.tensor(mask_tbl[target.values], names=("seqlen", "batch", "vocablen")).float() predictions = (mask_bad_codons + predictions.float()) predictions = predictions.argmax("vocablen") return predictions
def test(args, model, device, test_loader): model.eval() test_loss = 0 correct = 0 lmod = ntorch.nn.NLLLoss(reduction="none") with torch.no_grad(): for data, target in test_loader: data, target = data.to(device), target.to(device) data = ntorch.tensor(data, ("b", "c", "h", "w")) target = ntorch.tensor(target, ("b", )) output = model(data) test_loss = lmod(output, target).sum("b").item() pred = output.max("classes")[1] correct += (pred == target).sum("b").item() test_loss /= len(test_loader.dataset) print( "\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format( test_loss, correct, len(test_loader.dataset), 100.0 * correct / len(test_loader.dataset), ))
def make_translation_predictions(model, use_bs2=False): print('Generating translations') with open('test_predictions.txt', 'w') as outfile: with open('source_test.txt', 'r') as infile: for line in tqdm(list(infile)): tokens = [DE.vocab.stoi[w] for w in tokenize_de(line.strip())] src = ntorch.tensor(tokens, names="srcSeqlen") if use_bs2: translation = beam_search2(model, src, beam_size=5, num_results=10)[0] else: translation = beam_search(model, src, beam_size=5, num_results=10)[0] assert translation[0] == BOS_IND sent = ' '.join(EN.vocab.itos[i] for i in translation[1:]) outfile.write(sent + '\n')
def align(self, s): intra_s = self.intra_layers(s) intra_s_ = intra_s.values.transpose(0, 1) # formatting batch_seq = torch.bmm(intra_s_, intra_s_.transpose(1, 2)) batches = batch_seq.shape[0] seqlen = batch_seq.shape[1] align_matrix = torch.tensor([[(i - j) for j in range(seqlen)] for i in range(seqlen)]) align_matrix = torch.clamp(align_matrix, -self.cap, self.cap) align_matrix = align_matrix.unsqueeze(0).expand( batches, seqlen, seqlen) # batch * seqlen * seqlen align_matrix_b = self.bias[align_matrix + self.cap] weights = torch.softmax(align_matrix_b + batch_seq, dim=2) s_ = torch.matmul(weights, s.values.transpose(0, 1)) s_ = ntorch.tensor(s_, ('batch', 'seqlen', 'embedding')) return ntorch.cat([s, s_], 'embedding')
def forward(self, a, b, y=None): a_bar, b_bar = self.input(a, b) # ATTEND F_a = self.f(a_bar) F_b = self.f(b_bar) e_mat = F_a.dot('hidden', F_b) alpha = e_mat.softmax(dim='aSeqlen').dot('aSeqlen', a_bar) beta = e_mat.softmax(dim='bSeqlen').dot('bSeqlen', b_bar) # COMPARE AND AGGREGATE v1 = self.g(ntorch.cat([a_bar, beta], 'embedding')).sum('aSeqlen') v2 = self.g(ntorch.cat([b_bar, alpha], 'embedding')).sum('bSeqlen') # NOTE: currently adds log softmax layer after linear, use nllloss out = self.h(ntorch.cat([v1, v2], 'hidden')) if self.use_labels: y = ntorch.tensor(y.values.unsqueeze(1), names=('batch', 'hidden')) out = self.y_combine(ntorch.cat([out, y.float()], 'hidden')) yhat = self.final(out) return yhat
def self_align(self, a): # generate a' from a fintra_a = self.f_intra(a) unnamed_fintra = fintra_a.values.transpose(0, 1) fmat = torch.bmm(unnamed_fintra, unnamed_fintra.transpose(1, 2)) # fmat = batch x seqlen x seqlen batches = fmat.shape[0] seqlen = fmat.shape[1] index_mat = torch.tensor([[(i - j) for j in range(seqlen)] for i in range(seqlen)]) index_mat = torch.clamp(index_mat, -self.d_cap, self.d_cap) index_mat = index_mat.unsqueeze(0).expand(batches, seqlen, seqlen) dmat = self.bias[index_mat + self.d_cap] weights = torch.softmax(fmat + dmat, dim=2) aprime = torch.matmul(weights, a.values.transpose(0, 1)) aprime = ntorch.tensor(aprime, ('batch', 'seqlen', 'embedding')) abar = ntorch.cat([a, aprime], 'embedding') return abar
def make_kaggle_predictions(model, use_ks2=False): print('Generating Kaggle predictions') with open('kaggle_predictions.txt', 'w') as outfile: outfile.write('Id,Predicted\n') with open('source_test.txt', 'r') as infile: for i, line in enumerate(tqdm(list(infile))): tokens = [DE.vocab.stoi[w] for w in tokenize_de(line.strip())] src = ntorch.tensor(tokens, names="srcSeqlen") if use_ks2: preds = kaggle_search2(model, src) else: preds = kaggle_search(model, src) trigrams = [] for trigram in preds: assert len(trigram) == 3 trigram = escape('|'.join(EN.vocab.itos[i] for i in trigram)) trigrams.append(trigram) assert len(trigrams) == 100 outfile.write(str(i) + ',' + ' '.join(trigrams) + '\n')
def elbo_exact(self, premise, hypothesis, label): # computing the q distribution: p(c | a, b, y) q = self.q(premise, hypothesis, label).rename('label', 'latent') latent_dist = ds.Categorical(logits=q, dim_logit='latent') one_hot_label = torch.eye(4).index_select(0, label.values) one_hot_label = ntorch.tensor(one_hot_label, names=('batch', 'label')) # computing p(y | a, b, c) for every c objective = 0 q = q.exp() for c in range(len(self.models)): log_probs = self.models[c](premise, hypothesis) model_probs = q.get('latent', c) objective += (log_probs * one_hot_label).sum('label') * model_probs # adding on the KL regularizing term ones = ntorch.ones(self.K, names='latent').log_softmax(dim='latent') uniform_dist = ds.Categorical(logits=ones, dim_logit='latent') kl = ds.kl_divergence(latent_dist, uniform_dist) * self.kl_importance loss = -(objective.mean() - kl.mean()) return loss, loss.detach()
def exact(self, premise, hypothesis, label): q = self.q(premise, hypothesis, label).rename('label', 'latent') latent_dist = nds.Categorical(logits=q, dim_logit='latent') one_hot = torch.eye(4, out=torch.cuda.FloatTensor()).index_select( 0, label.values) one_hot = ntorch.tensor(one_hot, names=('batch', 'label')) # Calculate p(y | a, b, c) across all models K surrogate = 0 q = q.exp() for c in range(len(self.models)): log_probs = self.models[c](premise, hypothesis) model_probs = q.get('latent', c) surrogate += (log_probs * one_hot).sum('label') * model_probs # KL regularization ones = ntorch.ones(self.K, names='latent').log_softmax(dim='latent') prior = nds.Categorical(logits=ones, dim_logit='latent') KLD = nds.kl_divergence(latent_dist, prior) * self.kl_weight loss = KLD.mean() - surrogate._tensor.mean( ) # -(surrogate.mean() - kl.mean()) return loss, loss.detach()
def joint_ppl_acc(data_iter, model, device, aa_compress, TEXT, mask_tbl, teacher_force=1): ''' Calculate perplexity and accuracy on data iter Args: data_iter: Bucket iter model : Model that works over data iter device: device aa_compress: Helper model to consider dependnecies along model TEXT: text object over codons mask_tbl: Mask table teacher_force: Whether to use teacher forcing or not. Default yes Returns: {"acc": accuracy, "ppl": perplexity} ''' model.to(device) model.eval() model.teacher_force_prob = teacher_force aa_compress.to(device) aa_compress.eval() ppl = 0 num_total = 0 num_correct = 0 num_total = 0 loss_function = ntorch.nn.CrossEntropyLoss( reduction="none").spec("vocablen") with torch.no_grad(): for i, batch in enumerate(data_iter): # Select for all non zero tensors # Use this to find all indices that aren't padding seq_len = batch.sequence.shape["seqlen"] text = batch.sequence.narrow("seqlen", 0, seq_len - 1) target = batch.sequence.narrow("seqlen", 1, seq_len - 1) stacked_target = target.stack(dims=("batch", "seqlen"), name="seqlen") mask = (stacked_target != TEXT.vocab.stoi["<pad>"]) prop_indices = (ntorch.nonzero(mask).get("inputdims", 0)).rename( "elements", "seqlen") # Forward predictions = model(text, aa_compress(target)) # Mask all outputs that don't work # start codons to be predicted! mask_targets = target.clone() mask_targets[{"seqlen": 0}] = TEXT.vocab.stoi["<start>"] mask_bad_codons = ntorch.tensor(mask_tbl[mask_targets.values], names=("seqlen", "batch", "vocablen")).float() predictions = (mask_bad_codons.double() + predictions.double()) # Stack the predictions into one long vector and get correct indices predictions = (predictions.stack(dims=("batch", "seqlen"), name="seqlen").index_select( "seqlen", prop_indices)) predictions_hard = predictions.argmax("vocablen") # Select correct indices from target stacked_target = (stacked_target.index_select( "seqlen", prop_indices)) num_correct += (predictions_hard == stacked_target).sum().item() num_total += predictions_hard.shape["seqlen"] loss = loss_function(predictions, stacked_target) ppl += loss.sum().item() # For quick results, toggle this # if i == 20: # break return {"acc": num_correct / num_total, "ppl": np.exp(ppl / num_total)}