def __next__(self): if self.cursor < self.sample_num: required_batch = self.all_samples[self.cursor:self.cursor + self.batch_size] self.cursor += self.batch_size l_ctx = [sample[0] for sample in required_batch] mentions = [sample[1] for sample in required_batch] r_ctx = [sample[2] for sample in required_batch] labels = [sample[5] for sample in required_batch] types_str = [sample[6] for sample in required_batch] types = required_batch[0][7] # l_ctx_lens = [l.shape[0] for l in l_ctx] l_ctx_lens = [sample[3] for sample in required_batch] men_lens = [m.shape[0] for m in mentions] # r_ctx_lens = [r.shape[0] for r in r_ctx] r_ctx_lens = [sample[4] for sample in required_batch] l_ctx_padded = [ pad_src_seq(l, fg_config['ctx_window_size'])[numpy.newaxis, ...] for l in l_ctx ] mentions_padded = [ pad_src_seq(m, max(men_lens))[numpy.newaxis, ...] for m in mentions ] r_ctx_padded = [ pad_src_seq(r, fg_config['ctx_window_size'])[numpy.newaxis, ...] for r in r_ctx ] # (B, S, 1) l_ctx_tensor = torch.from_numpy( numpy.concatenate(l_ctx_padded, axis=0)).type(torch.LongTensor) # (B, S, 1) mentions_tensor = torch.from_numpy( numpy.concatenate(mentions_padded, axis=0)).type(torch.LongTensor) # (B, S, 1) r_ctx_tensor = torch.from_numpy( numpy.concatenate(r_ctx_padded, axis=0)).type(torch.LongTensor) # (B, 89) labels_tensor = torch.from_numpy(numpy.concatenate( labels, axis=0)).type(torch.FloatTensor) # (89, 1) types_tensor = torch.from_numpy(types).type(torch.LongTensor) return { 'l_ctx_tensor': l_ctx_tensor, 'l_ctx_lens': l_ctx_lens, 'mentions_tensor': mentions_tensor, 'men_lens': men_lens, 'r_ctx_tensor': r_ctx_tensor, 'r_ctx_lens': r_ctx_lens, 'labels_tensor': labels_tensor, 'types_tensor': types_tensor, 'types_str': types_str } else: raise StopIteration("out of list")
def __next__(self): if self.cursor < self.sample_num: required_batch = self.questions[self.cursor:self.cursor + self.batch_size] self.cursor += self.batch_size final_batch = {'ids': [], 'art_ids': [], 'para_ids': []} contexts = [] questions = [] questions_lens = [] contexts_lens = [] ans_starts = [] ans_ends = [] for question in required_batch: final_batch['ids'].append(question['id']) final_batch['art_ids'].append(question['art_id']) final_batch['para_ids'].append(question['para_id']) questions.append(question['question']) context = self.paragraphs[question['art_id']][ question['para_id']] contexts.append(context) questions_lens.append(question['question'].shape[0]) contexts_lens.append(context.shape[0]) ans_starts.append(question['ans_start']) ans_ends.append(question['ans_end']) contexts_padded = [ pad_src_seq(context, max(contexts_lens))[numpy.newaxis, ...] for context in contexts ] contexts_tensor = torch.from_numpy( numpy.concatenate(contexts_padded, axis=0)).type(torch.LongTensor) questions_padded = [ pad_src_seq(s, max(questions_lens))[numpy.newaxis, ...] for s in questions ] questions_tensor = torch.from_numpy( numpy.concatenate(questions_padded, axis=0)).type(torch.LongTensor) final_batch['contexts'] = contexts_tensor final_batch['questions'] = questions_tensor final_batch['con_lens'] = contexts_lens final_batch['q_lens'] = questions_lens start_mat = numpy.zeros((len(required_batch), max(contexts_lens)), dtype='int32') for i, num in enumerate(ans_starts): start_mat[i, num] = 1 end_mat = numpy.zeros((len(required_batch), max(contexts_lens)), dtype='int32') for i, num in enumerate(ans_ends): end_mat[i, num] = 1 final_batch['start'] = torch.from_numpy(start_mat).type( torch.FloatTensor) final_batch['end'] = torch.from_numpy(end_mat).type( torch.FloatTensor) final_batch['ans_start'] = ans_starts final_batch['ans_end'] = ans_ends return final_batch else: raise StopIteration("out of list")
def next(self): if self.cursor < self.sample_num: required_batch = self.all_samples[self.cursor:self.cursor + self.batch_size] # required_batch = self.all_samples[:config['batch_size']] self.cursor += self.batch_size input_seqs = [seq_label[0] for seq_label in required_batch] input_labels = [seq_label[1] for seq_label in required_batch] input_seqs_length = [s.shape[0] for s in input_seqs] input_labels_length = [s.size for s in input_labels] seqs_padded = [ pad_src_seq(s, max(input_seqs_length))[numpy.newaxis, ...] for s in input_seqs ] labels_padded = [ pad_tgt_seq(s, max(input_labels_length))[numpy.newaxis, ...] for s in input_labels ] # (batch, max_seq, len(embnames)+len(gazs)+max_char+max_char) seq_tensor = torch.from_numpy( numpy.concatenate(seqs_padded, axis=0)).type(torch.LongTensor) # (batch, max_label) label_tensor = torch.from_numpy( numpy.concatenate(labels_padded, axis=0)).type(torch.LongTensor) # input_seqs_length[-1] = 350 # input_labels_length[-1] = 350 return seq_tensor, label_tensor, input_labels_length, input_seqs_length else: raise StopIteration("out of list")