def _pad_batch_records(self, batch_records): batch_token_ids = [record.token_ids for record in batch_records] batch_text_type_ids = [ record.text_type_ids for record in batch_records ] batch_position_ids = [record.position_ids for record in batch_records] batch_label_ids = [record.label_ids for record in batch_records] # padding padded_token_ids, input_mask, batch_seq_lens = pad_batch_data( batch_token_ids, pad_idx=self.pad_id, return_input_mask=True, return_seq_lens=True) padded_text_type_ids = pad_batch_data(batch_text_type_ids, pad_idx=self.pad_id) padded_position_ids = pad_batch_data(batch_position_ids, pad_idx=self.pad_id) padded_label_ids = pad_batch_data(batch_label_ids, pad_idx=len(self.label_map) - 1) return_list = [ padded_token_ids, padded_text_type_ids, padded_position_ids, input_mask, padded_label_ids, batch_seq_lens ] return return_list
def _pad_batch_records(self, batch_records): batch_token_ids = [record.token_ids for record in batch_records] batch_text_type_ids = [ record.text_type_ids for record in batch_records ] batch_position_ids = [record.position_ids for record in batch_records] batch_labels = [record.label_id for record in batch_records] batch_labels = np.array(batch_labels).astype("int64").reshape([-1, 1]) # padding padded_token_ids, input_mask, seq_lens = pad_batch_data( batch_token_ids, pad_idx=self.pad_id, return_input_mask=True, return_seq_lens=True) padded_text_type_ids = pad_batch_data(batch_text_type_ids, pad_idx=self.pad_id) padded_position_ids = pad_batch_data(batch_position_ids, pad_idx=self.pad_id) return_list = [ padded_token_ids, padded_text_type_ids, padded_position_ids, input_mask, batch_labels, seq_lens ] return return_list