コード例 #1
0
 def pad_batch_BERT(self, batch, bert_layers, bert_dims):
     num_sentences = [len(x) for x in batch]
     max_num_sentences = max(num_sentences)
     batch, no_padding_num_sentences = hF.pad_batch_with_sentences_BERT(
         batch, max_num_sentences, bert_layers, bert_dims)
     ##########################################
     return batch, max_num_sentences, None, no_padding_num_sentences, None
コード例 #2
0
    def encode_and_pad_BERT(self, data_batches, Bert_model_Path, device,
                            bert_layers, bert_dims):
        from pytorch_pretrained_bert import BertTokenizer, BertModel
        tokenizer = BertTokenizer.from_pretrained(
            Bert_model_Path
        )  # '../../pytorch-pretrained-BERT/bert_models/uncased_L-12_H-768_A-12/')
        model = BertModel.from_pretrained(
            Bert_model_Path
        )  # '../../pytorch-pretrained-BERT/bert_models/uncased_L-12_H-768_A-12/')
        model.eval()
        model.to(device)
        #################### Prepare Training data################
        print('Encoding Data using BERT...')
        max_sentences = []
        no_padding_sentences = []
        for index, batch in tqdm(enumerate(data_batches)):
            batch = hF.encode_batch_BERT(batch, model, tokenizer, device,
                                         bert_layers)
            # data_batches[index] = batch
            num_sentences = [len(x) for x in batch]
            max_num_sentences = max(num_sentences)

            batch, no_padding_num_sentences = hF.pad_batch_with_sentences_BERT(
                batch, max_num_sentences, bert_layers, bert_dims)

            max_sentences.append(max_num_sentences)
            no_padding_sentences.append(no_padding_num_sentences)
            data_batches[index] = batch
        ##########################################
        return data_batches, max_sentences, None, no_padding_sentences, None
コード例 #3
0
    def pad_BERT(self, data_batches, bert_layers, bert_dims):
        print('Padding Data using BERT...')
        max_sentences = []
        no_padding_sentences = []
        for index, batch in tqdm(enumerate(data_batches)):
            num_sentences = [len(x) for x in batch]
            max_num_sentences = max(num_sentences)

            batch, no_padding_num_sentences = hF.pad_batch_with_sentences_BERT(
                batch, max_num_sentences, bert_layers, bert_dims)

            max_sentences.append(max_num_sentences)
            no_padding_sentences.append(no_padding_num_sentences)
            data_batches[index] = batch
        ##########################################
        return data_batches, max_sentences, None, no_padding_sentences, None