コード例 #1
0
    def __next__(self):
        if self.cursor < self.sample_num:
            required_batch = self.all_samples[self.cursor:self.cursor +
                                              self.batch_size]

            self.cursor += self.batch_size
            l_ctx = [sample[0] for sample in required_batch]
            mentions = [sample[1] for sample in required_batch]
            r_ctx = [sample[2] for sample in required_batch]
            labels = [sample[5] for sample in required_batch]
            types_str = [sample[6] for sample in required_batch]
            types = required_batch[0][7]
            # l_ctx_lens = [l.shape[0] for l in l_ctx]
            l_ctx_lens = [sample[3] for sample in required_batch]
            men_lens = [m.shape[0] for m in mentions]
            # r_ctx_lens = [r.shape[0] for r in r_ctx]
            r_ctx_lens = [sample[4] for sample in required_batch]
            l_ctx_padded = [
                pad_src_seq(l, fg_config['ctx_window_size'])[numpy.newaxis,
                                                             ...]
                for l in l_ctx
            ]
            mentions_padded = [
                pad_src_seq(m, max(men_lens))[numpy.newaxis, ...]
                for m in mentions
            ]
            r_ctx_padded = [
                pad_src_seq(r, fg_config['ctx_window_size'])[numpy.newaxis,
                                                             ...]
                for r in r_ctx
            ]
            # (B, S, 1)
            l_ctx_tensor = torch.from_numpy(
                numpy.concatenate(l_ctx_padded, axis=0)).type(torch.LongTensor)
            # (B, S, 1)
            mentions_tensor = torch.from_numpy(
                numpy.concatenate(mentions_padded,
                                  axis=0)).type(torch.LongTensor)
            # (B, S, 1)
            r_ctx_tensor = torch.from_numpy(
                numpy.concatenate(r_ctx_padded, axis=0)).type(torch.LongTensor)
            # (B, 89)
            labels_tensor = torch.from_numpy(numpy.concatenate(
                labels, axis=0)).type(torch.FloatTensor)
            # (89, 1)
            types_tensor = torch.from_numpy(types).type(torch.LongTensor)

            return {
                'l_ctx_tensor': l_ctx_tensor,
                'l_ctx_lens': l_ctx_lens,
                'mentions_tensor': mentions_tensor,
                'men_lens': men_lens,
                'r_ctx_tensor': r_ctx_tensor,
                'r_ctx_lens': r_ctx_lens,
                'labels_tensor': labels_tensor,
                'types_tensor': types_tensor,
                'types_str': types_str
            }
        else:
            raise StopIteration("out of list")
コード例 #2
0
 def __next__(self):
     if self.cursor < self.sample_num:
         required_batch = self.questions[self.cursor:self.cursor +
                                         self.batch_size]
         self.cursor += self.batch_size
         final_batch = {'ids': [], 'art_ids': [], 'para_ids': []}
         contexts = []
         questions = []
         questions_lens = []
         contexts_lens = []
         ans_starts = []
         ans_ends = []
         for question in required_batch:
             final_batch['ids'].append(question['id'])
             final_batch['art_ids'].append(question['art_id'])
             final_batch['para_ids'].append(question['para_id'])
             questions.append(question['question'])
             context = self.paragraphs[question['art_id']][
                 question['para_id']]
             contexts.append(context)
             questions_lens.append(question['question'].shape[0])
             contexts_lens.append(context.shape[0])
             ans_starts.append(question['ans_start'])
             ans_ends.append(question['ans_end'])
         contexts_padded = [
             pad_src_seq(context, max(contexts_lens))[numpy.newaxis, ...]
             for context in contexts
         ]
         contexts_tensor = torch.from_numpy(
             numpy.concatenate(contexts_padded,
                               axis=0)).type(torch.LongTensor)
         questions_padded = [
             pad_src_seq(s, max(questions_lens))[numpy.newaxis, ...]
             for s in questions
         ]
         questions_tensor = torch.from_numpy(
             numpy.concatenate(questions_padded,
                               axis=0)).type(torch.LongTensor)
         final_batch['contexts'] = contexts_tensor
         final_batch['questions'] = questions_tensor
         final_batch['con_lens'] = contexts_lens
         final_batch['q_lens'] = questions_lens
         start_mat = numpy.zeros((len(required_batch), max(contexts_lens)),
                                 dtype='int32')
         for i, num in enumerate(ans_starts):
             start_mat[i, num] = 1
         end_mat = numpy.zeros((len(required_batch), max(contexts_lens)),
                               dtype='int32')
         for i, num in enumerate(ans_ends):
             end_mat[i, num] = 1
         final_batch['start'] = torch.from_numpy(start_mat).type(
             torch.FloatTensor)
         final_batch['end'] = torch.from_numpy(end_mat).type(
             torch.FloatTensor)
         final_batch['ans_start'] = ans_starts
         final_batch['ans_end'] = ans_ends
         return final_batch
     else:
         raise StopIteration("out of list")
コード例 #3
0
    def next(self):
        if self.cursor < self.sample_num:
            required_batch = self.all_samples[self.cursor:self.cursor +
                                              self.batch_size]
            # required_batch = self.all_samples[:config['batch_size']]
            self.cursor += self.batch_size
            input_seqs = [seq_label[0] for seq_label in required_batch]
            input_labels = [seq_label[1] for seq_label in required_batch]
            input_seqs_length = [s.shape[0] for s in input_seqs]
            input_labels_length = [s.size for s in input_labels]
            seqs_padded = [
                pad_src_seq(s, max(input_seqs_length))[numpy.newaxis, ...]
                for s in input_seqs
            ]
            labels_padded = [
                pad_tgt_seq(s, max(input_labels_length))[numpy.newaxis, ...]
                for s in input_labels
            ]
            # (batch, max_seq, len(embnames)+len(gazs)+max_char+max_char)
            seq_tensor = torch.from_numpy(
                numpy.concatenate(seqs_padded, axis=0)).type(torch.LongTensor)
            # (batch, max_label)
            label_tensor = torch.from_numpy(
                numpy.concatenate(labels_padded,
                                  axis=0)).type(torch.LongTensor)

            # input_seqs_length[-1] = 350
            # input_labels_length[-1] = 350

            return seq_tensor, label_tensor, input_labels_length, input_seqs_length
        else:
            raise StopIteration("out of list")