def unk_tensor(self, tensor): unk = self.vocab.w2i['<UNK>'] mask = (tensor >= self.vocab.count).long() ones = torch.ones(mask.size()).long() ones = to_cuda(ones, self.iscuda) tensor = tensor * (ones - mask) + mask * unk return tensor
def forward(self, sources, src_simil, q_simil, context_len): bc, in_seq = sources.size() b = len(context_len) idx = 0 similarity_list = [] sources_list = [] for i, c in enumerate(context_len): similarities = F.softmax( torch.mm(src_simil[idx:idx + c].squeeze(), q_simil[i].view(-1, 1)).squeeze()) similarity_list.append( similarities) # distribution of each line, for later softmax max_idx = similarities.max(0)[1] sources_list.append(sources[idx + max_idx.data[0]]) # selected source idx += c sources = torch.stack(sources_list, 0) # [b x seq] similarity_tensor = to_cuda(Variable(torch.zeros(b, 10)), self.iscuda) for i, sim in enumerate(similarity_list): length = len(sim) similarity_tensor[i, :length] = similarity_tensor[i, :length] + sim return sources, similarity_tensor
def main(args): # obtain vocabulary vocab = Vocab(args.vocab_size) vocab.w2i = np.load(args.word2idx).item() vocab.i2w = np.load(args.idx2word).item() vocab.count = len(vocab.w2i) # obtain dataset in batches file_list = os.listdir(args.data_dir) batch = Batch(file_list, args.max_enc, args.max_dec) # load model if args.load_model != '': model = torch.load(args.load_model) else: model = Model(args) model = to_cuda(model) # computation for each epoch epoch = 1 while (epoch <= args.epochs): random.shuffle(file_list) for file in file_list: with open(os.path.join(args.data_dir, file)) as f: minibatch = f.read() stories, summaries = batch.process_minibatch(minibatch, vocab) print(stories) print(summaries)
def forward(self, sources, queries, context_len): bc, in_seq = sources.size() b = len(context_len) idx = 0 similarity_list = [] sources_list = [] for i,c in enumerate(context_len): query = queries[i].tolist() similarities = [self.lev(line.tolist(),query) for line in sources[idx:idx+c]] similarities = [x+1e-4 for x in similarities] similarities = torch.Tensor(similarities) similarity_list.append(similarities) # distribution of each line, for later softmax max_idx = similarities.max(0)[1] sources_list.append(sources[idx+max_idx.data[0]]) # selected source idx+=c sources = torch.stack(sources_list,0) # [b x seq] similarity_tensor = to_cuda(Variable(torch.zeros(b,10)),self.iscuda) for i,sim in enumerate(similarity_list): length = len(sim) similarity_tensor[i,:length] = similarity_tensor[i,:length] + sim return sources, similarity_tensor bc, in_seq = sources.size() b = len(context_len)
def forward(self, sources, queries, lengths, targets): """ sources: [batch*context_lines x seq] OR [batch x seq] queries: [batch x seq] targets: [batxh x seq] """ source_len, query_len, target_len, context_len = lengths # use similarity function to get closest lines from source if self.single == False: sources, similarities = self.similarity(sources, queries, context_len) # merge sources and queries to one matrix source_lens = (sources > 0).long().sum(1) query_lens = (queries > 0).long().sum(1) max_len = (source_lens + query_lens).max() new_sources = torch.zeros(sources.size(0), max_len).long() new_sources = to_cuda(new_sources, self.iscuda) for i in range(sources.size(0)): new_sources[i, :source_lens[i]] += sources[i, :source_lens[i]] new_sources[i, source_lens[i]:source_lens[i] + query_lens[i]] += queries[i, :query_lens[i]] self.inputs = new_sources # inputs for the copynet model # get target outputs using the copynet model outputs = self.copynet(new_sources, targets) return outputs, similarities
def forward(self, sources, queries, context_len): """ sources: LongTensor, [batch*context x src_seq] queries: LongTensor, [batch x qry_seq] context_len: [batch] """ bc, in_seq = sources.size() b, q_seq = queries.size() embedded_sources = self.embedding( to_cuda(Variable(self.unk_tensor(sources)), self.iscuda)) embedded_queries = self.embedding( to_cuda(Variable(self.unk_tensor(queries)), self.iscuda)) src_mask = Variable( (sources > 0).float().unsqueeze(2)) # [batch*context x src_seq x 1] q_mask = Variable( (queries > 0).float().unsqueeze(2)) # [batch x qry_seq x 1] q_len = q_mask.squeeze().sum(1).data.long().tolist() # [batch] src_mask = to_cuda(src_mask, self.iscuda) q_mask = to_cuda(q_mask, self.iscuda) c_idx = 0 source_list = [] sim_list = [] # print(context_len) # print(q_len) for i in range(b): # print(i,context_len[i],c_idx) tmp1 = embedded_sources[c_idx:c_idx + context_len[i], :q_len[i]] tmp2 = embedded_queries[i, :q_len[i]] sim = F.softmax((tmp1 * tmp2).sum(2).sum(1)) sim_list.append(sim) top_score = sim.max(0)[1].data[0] # argmax source_list.append(sources[c_idx + top_score]) # add answer c_idx += context_len[i] # get similarities similarities = torch.stack(sim_list, 0) sources = torch.stack(source_list, 0) return sources, similarities
def forward(self, sources, queries): """ sources: LongTensor, [batch*context x src_seq] queries: LongTensor, [batch x qry_seq] """ bc, in_seq = sources.size() b, q_seq = queries.size() embedded_sources = self.embedding( to_cuda(Variable(self.unk_tensor(sources)), self.iscuda)) embedded_queries = self.embedding( to_cuda(Variable(self.unk_tensor(queries)), self.iscuda)) src_mask = Variable((sources > 0).float().unsqueeze(2)) q_mask = Variable((queries > 0).float().unsqueeze(2)) src_mask = to_cuda(src_mask, self.iscuda) q_mask = to_cuda(q_mask, self.iscuda) sources_out = embedded_sources * to_cuda( Variable(self.pos_emb[:in_seq]).unsqueeze(0).expand( bc, in_seq, self.embed), self.iscuda) queries_out = embedded_queries * to_cuda( Variable(self.pos_emb[:q_seq]).unsqueeze(0).expand( b, q_seq, self.embed), self.iscuda) # get resulting tensors of shape [bc x embed] & [b x embed] src_simil = (sources_out * src_mask).sum(1) q_simil = (queries_out * q_mask).sum(1) return src_simil, q_simil
def forward(self, sources, queries, lengths, targets): """ sources: [batch*context_lines x seq] OR [batch x seq] queries: [batch x seq] targets: [batxh x seq] """ source_len, query_len, target_len, context_len = lengths # use similarity function to get closest lines from source if self.single == False: # similarity_encode # similarity_compute if self.args.similarity == 'levenshtein': sources, similarities = self.similarity( sources, queries, context_len) elif self.args.similarity == 'position_cosine': sources, similarities = self.position_cosine( sources, queries, context_len) elif self.args.similarity == 'lstm_cosine': sources, similarities = self.lstm_cosine( sources, queries, context_len) else: src_simil, q_simil = self.encoder(sources, queries) sources, similarities = self.similarity( sources, src_simil, q_simil, context_len) # here, 'sources' are the selected lines # merge sources and queries to one matrix source_lens = (sources > 0).long().sum(1) query_lens = (queries > 0).long().sum(1) max_len = (source_lens + query_lens).max() new_sources = torch.zeros(sources.size(0), max_len).long() new_sources = to_cuda(new_sources, self.iscuda) for i in range(sources.size(0)): try: new_sources[i, :source_lens[i]] += sources[i, :source_lens[i]] except ValueError: pass new_sources[i, source_lens[i]:( source_lens[i] + query_lens[i])] += queries[i, :query_lens[i]] # get target outputs using the copynet model outputs = self.copynet(new_sources, targets) # delete irrelevant items del sources, targets, source_len, query_len, target_len, context_len, lengths del new_sources if self.single: return outputs else: return outputs, similarities
def forward(self, sources, queries): """ sources: LongTensor, [batch*context x src_seq] queries: LongTensor, [batch x qry_seq] context_len: LongTensor, [batch] """ bc, in_seq = sources.size() b, q_seq = queries.size() embedded_sources = self.embedding( to_cuda(Variable(self.unk_tensor(sources)), self.iscuda)) embedded_queries = self.embedding( to_cuda(Variable(self.unk_tensor(queries)), self.iscuda)) encoded_sources, _ = self.lstm(embedded_sources) encoded_queries, _ = self.lstm(embedded_queries) # # here we will use the last hidden state source_len = (sources > 0).long().sum(1) query_len = (queries > 0).long().sum(1) # sources_last = [x[len()] for i,x in enumerate(encoded_sources)] # truncated # sources_last = [x[min([source_len[i]-1,query_len[int(i/10)]-1])] for i,x in enumerate(encoded_sources)] sources_last = [ x[min([source_len[i] - 1, query_len[int(i / 10)] - 1])] for i, x in enumerate(encoded_sources) ] queries_last = [ x[query_len[i] - 1] for i, x in enumerate(encoded_queries) ] # original # queries_last = [x[query_len[i]-1] for i,x in enumerate(encoded_queries)] src_simil = torch.stack(sources_last, 0) q_simil = torch.stack(queries_last, 0) return src_simil, q_simil
def forward(self, encoded_sources, sources, targets=None): """ embedding: embedding function from above encoded_sources: Variable, [batch x seq x hidden] sources, targets: LongTensor, [batch x seq] """ vocab_size = self.vocab_size hidden_size = self.hidden b, seq, _ = encoded_sources.size() source_lens = (sources > 0).long().sum(1) if targets is not None: self.max_out_seq = targets.size(1) target_lens = (targets > 0).long().sum(1) # 0. set initial states last_step = torch.stack( [x[source_lens[i] - 1] for i, x in enumerate(encoded_sources)], 0) # [batch x hidden*2] state = self.Ws(last_step).unsqueeze(0) # [1 x batch x hidden*2] weighted = Variable(torch.Tensor(b, 1, hidden_size * 2).zero_()) # [b x 1 x hidden] weighted = to_cuda(weighted, self.iscuda) out_list = [] for i in range(self.max_out_seq): # 1. update states if self.is_train: inputs = self.embedding( Variable(self.unk_tensor(targets[:, i]))) gru_input = torch.cat([inputs.unsqueeze(1), weighted], 2) # [b x 1 x h+h] _, state = self.gru(gru_input, state) # [ 1 x b x hidden] # 2. predict next word y_t # 2-1) get score_g score_g = self.Wo(state.squeeze()) # [b x vocab_size] # 2-2) get score_c score_c = F.tanh( self.Wc(encoded_sources.contiguous().view(-1, hidden_size * 2))) score_c = score_c.view(b, -1, hidden_size) # [b x seq x hid] score_c = torch.bmm(score_c, state.view(b, -1, 1)).squeeze() # [b x seq] score_c = F.tanh(score_c) encoded_mask = Variable( (sources == 0).float() * (-1000)) # causing inplace error score_c = score_c + encoded_mask # 2-3) get softmax-ed probs score = torch.cat([score_g, score_c], 1) # [b x (vocab+seq)] probs = F.softmax(score) prob_g = probs[:, :vocab_size] prob_c = probs[:, vocab_size:] ############################################################################################################ # 2-4) add to prob_g slots for OOVs oovs = Variable(torch.Tensor(b, self.max_oovs).zero_()) + 1e-5 oovs = to_cuda(oovs, self.iscuda) prob_g = torch.cat([prob_g, oovs], 1) # 2-5) add prob_c to prob_g numbers = sources.view(-1).tolist() set_numbers = list(set(numbers)) # unique numbers that appear c = Counter(numbers) dup_list = [k for k in set_numbers if (c[k] > 1)] dup_attn_sum = Variable(torch.zeros(b, seq)) masked_idx_sum = Variable(torch.Tensor(b, seq).zero_()) encoded_idx_var = Variable(sources) if self.iscuda: dup_attn_sum = dup_attn_sum.cuda() masked_idx_sum = masked_idx_sum.cuda() encoded_idx_var = encoded_idx_var.cuda() for dup in dup_list: mask = (encoded_idx_var == dup).float() masked_idx_sum += mask attn_mask = torch.mul(mask, prob_c) attn_sum = attn_mask.sum(1).unsqueeze(1) dup_attn_sum += torch.mul(mask, attn_sum) attn = torch.mul(prob_c, (1 - masked_idx_sum)) + dup_attn_sum batch_indices = torch.arange(start=0, end=b).long() batch_indices = batch_indices.expand(seq, b).transpose( 1, 0).contiguous().view(-1) idx_repeat = torch.arange(start=0, end=seq).repeat(b).long() prob_c_to_g = Variable( torch.zeros(b, self.vocab_size + self.max_oovs)) word_indices = sources.view(-1) if self.iscuda: # batch_indices = batch_indices.cuda() # idx_repeat = idx_repeat.cuda() # prob_c_to_g = prob_c_to_g.cuda() attn = attn.cpu() word_indices = word_indices.cpu() prob_c_to_g[batch_indices, word_indices] += attn[batch_indices, idx_repeat] if self.iscuda: prob_c_to_g = prob_c_to_g.cuda() attn = attn.cuda() # 2-6) get final output out = prob_g + prob_c_to_g + 1e-6 # 3. get weighted attention to use for predicting next word # 3-1) get tensor that shows whether each decoder input has previously appeared in the encoder prev_input = (targets[:, i]).unsqueeze(1).expand(b, sources.size(1)) idx_from_input = (sources == prev_input).float() idx_from_input = Variable(idx_from_input) for j in range(b): if idx_from_input[j].sum().data[0] > 1: idx_from_input[j] = idx_from_input[j] / idx_from_input[ j].sum().data[0] # 3-2) multiply with prob_c to get final weighted representation weight_attn = prob_c * idx_from_input weight_attn = weight_attn.unsqueeze(1) # [b x 1 x seq] weighted = torch.bmm(weight_attn, encoded_sources) # weighted: [b x 1 x hidden] # 4. get next inputs max_vals = self.unk_tensor(out.max(1)[1].data) inputs = self.embedding(Variable(max_vals)) out_list.append(out) # out_seq @ [batch x vocab+oov] # get final outputs return torch.stack(out_list, 1)
def forward(self, sources, queries, context_len): """ sources: LongTensor, [batch*context x src_seq] queries: LongTensor, [batch x qry_seq] context_len: LongTensor, [batch] """ bc, in_seq = sources.size() b, q_seq = queries.size() embedded_sources = self.embedding( to_cuda(Variable(self.unk_tensor(sources)), self.iscuda)) embedded_queries = self.embedding( to_cuda(Variable(self.unk_tensor(queries)), self.iscuda)) src_mask = Variable((sources > 0).float().unsqueeze(2)) q_mask = Variable((queries > 0).float().unsqueeze(2)) src_mask = to_cuda(src_mask, self.iscuda) q_mask = to_cuda(q_mask, self.iscuda) sources_out = embedded_sources * to_cuda( Variable(self.pos_emb[:in_seq]).unsqueeze(0).expand( bc, in_seq, self.embed), self.iscuda) queries_out = embedded_queries * to_cuda( Variable(self.pos_emb[:q_seq]).unsqueeze(0).expand( b, q_seq, self.embed), self.iscuda) # get resulting tensors of shape [bc x embed] & [b x embed] src_simil = (sources_out * src_mask).sum(1) q_simil = (queries_out * q_mask).sum(1) idx = 0 similarity_list = [] sources_list = [] for i, c in enumerate(context_len): similarities = F.softmax( torch.mm(src_simil[idx:idx + c], q_simil[i].unsqueeze(1)).squeeze()) similarity_list.append( similarities) # distribution of each line, for later softmax max_idx = similarities.max(0)[1] sources_list.append(sources[idx + max_idx.data[0]]) # selected source idx += c sources = torch.stack(sources_list, 0) # [b x seq] similarity_tensor = to_cuda(Variable(torch.zeros(b, 10)), self.iscuda) for i, sim in enumerate(similarity_list): length = len(sim) similarity_tensor[i, :length] = similarity_tensor[i, :length] + sim return sources, similarity_tensor # here we will use the last hidden state source_len = (sources > 0).long().sum(1) sources_last = [ x[source_len[i] - 1].unsqueeze(0) for i, x in enumerate(encoded_sources) ] queries_last = [ x[query_len[i] - 1].unsqueeze(0) for i, x in enumerate(encoded_queries) ] y_list = [] for i, length in enumerate(context_len): y_list.append(queries_last[i].expand(length, hidden)) x = torch.cat(sources_last, 0) y = torch.cat(y_list, 0) # [batch*context x hidden] mul = F.cosine_similarity(x, y) # [batch*context] temp = 0 idx_list = [] attn_list = [] source_list = [] encoded_list = [] for i, length in enumerate(context_len): attn = F.softmax(mul[temp:temp + length]) attn_list.append(attn) idx = attn.max(0)[1].data[0] idx_list.append(idx) out = (encoded_sources[temp:temp + length] * attn.unsqueeze(1).unsqueeze(2)).sum(0) source_list.append(sources[temp + idx].unsqueeze(0)) encoded_list.append(out.unsqueeze(0)) temp += length out = torch.cat(encoded_list, 0) attns = torch.cat(attn_list, 0) sources = torch.stack(source_list, 0) return out, sources, attns, idx_list