コード例 #1
0
def predictor(sentence1, sentence2):
    text = "[CLS] " + sentence1 + " [SEP] a" + sentence2 + " [SEP]"

    ids1 = [0] *(len(tokenizer.tokenize(sentence1)) + 2)
    ids2 = [1] *(len(tokenizer.tokenize(sentence2)) + 1)
    ids1.extend(ids2)
    
    tokenized_text = tokenizer.tokenize(text)
    indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
    segments_ids = ids1
    tokens_tensor = torch.tensor([indexed_tokens])
    segments_tensors = torch.tensor([segments_ids])
    
    model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased')
    model.eval()

    tokens_tensor = tokens_tensor.to('cuda')
    segments_tensors = segments_tensors.to('cuda')
    model.to('cuda')

    # Predict the next sentence classification logits
    with torch.no_grad():
        next_sent_classif_logits = model(tokens_tensor, segments_tensors)

    ret = torch.softmax(next_sent_classif_logits[0], dim=1)
    a.cpu()
    return a[0][0].item()
コード例 #2
0
ファイル: bert.py プロジェクト: cbonoz/pytorch19
import torch
from pytorch_transformers import BertForNextSentencePrediction
from pytorch_transformers import BertTokenizer
from torch.nn.functional import cosine_similarity
from torch.nn.functional import softmax
from torch.nn.utils.rnn import pad_sequence

BERT_MODEL_VERSION = 'bert-base-uncased'
MAX_SENTENCE_LENGTH = 512

tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_VERSION)
model = BertForNextSentencePrediction.from_pretrained(
    BERT_MODEL_VERSION,
    output_hidden_states=True,
)

model.eval()

if torch.cuda.is_available():
    model.cuda()


def calculate_similarities(
    query_embedding,
    document_embeddings,
):

    return cosine_similarity(
        query_embedding,
        document_embeddings,
        dim=1,
コード例 #3
0
ファイル: do.py プロジェクト: Lennethe/paper
print("文章2を入力してください")
sentence2 = input()

text = "[CLS] " + sentence1 + " [SEP] " + sentence2 + " [SEP]"

ids1 = [0] *(len(tokenizer.tokenize(sentence1)) + 2)
ids2 = [1] *(len(tokenizer.tokenize(sentence2)) + 1)
ids1.extend(ids2)
 
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
segments_ids = ids1
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])
 
model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased')
model.eval()

tokens_tensor = tokens_tensor.to('cuda')
segments_tensors = segments_tensors.to('cuda')
model.to('cuda')

# Predict the next sentence classification logits
with torch.no_grad():
    next_sent_classif_logits = model(tokens_tensor, segments_tensors)

a = torch.softmax(next_sent_classif_logits[0], dim=1)
print(torch.softmax(next_sent_classif_logits[0], dim=1))
print(torch.softmax(next_sent_classif_logits[0].cpu(), dim=1))
print(a.cpu())
#a.item()
コード例 #4
0
                'input_ids': batch[0],
                'attention_mask': batch[1],
                'token_type_ids': batch[2]
            }
            outputs = model(**inputs)
            # logger.info(outputs)
            # logger.info(torch.argmax(outputs[0],dim=-1))
            # 二分类,0代表是下一句
            if torch.argmax(F.softmax(outputs[0], dim=-1), dim=-1).item() == 0:
                logger.info([texts_a, texts_b])
                logger.info(F.softmax(outputs[0], dim=-1))
    return [texts_b, F.softmax(outputs[0], dim=-1)]


if __name__ == "__main__":
    import pickle
    import pandas as pd
    tokenizer = BertTokenizer.from_pretrained('bert-base-chinese',
                                              do_lower_case=True)
    model = BertForNextSentencePrediction.from_pretrained('out/')
    model.to('cuda')

    break_tag = pd.read_excel(
        '/data/jh/notebooks/wanglei/1688/data/break_tag.xlsx', header=None)
    break_tag = list(break_tag[0])
    ret = []
    for tag in break_tag:
        text_a = ['露出精致的锁骨和优美的天鹅颈']
        text_b = [tag]
        ret.append(test(text_a, text_b, tokenizer, model))