def Bert_Sim_Model(db_path, path, resp):
    """
    Loads the Bert Similarity pre-trained model to analyze the fuzzy words
    """
    model = WebBertSimilarity(device='cpu', batch_size=10)
    df = select_db(db_path)
    ind = select_rest(path, resp)

    to_compare = df[df.restaurant == ind]
    m = []
    for res in resp:
        for food in to_compare.name:
            pred = float(model.predict([(res, food)]))
            if pred > 2:
                m.append([res, to_compare.mean_value[to_compare.name == food]])

    df_final = pd.DataFrame(columns=['item', 'calories'])
    for row in m:
        df = pd.DataFrame(row)
        df_final = pd.concat([df_final, df])

    return df_final
 def predict(self):
     model = WebBertSimilarity()
     model_input = [(self.sentence1, self.sentence2)]
     predictions = model.predict(model_input)
     #print(predictions)
     return predictions
Exemple #3
0
from semantic_text_similarity.models import WebBertSimilarity
from semantic_text_similarity.models import ClinicalBertSimilarity

web_model = WebBertSimilarity(device='cpu', batch_size=10)

clinical_model = ClinicalBertSimilarity(device='cpu', batch_size=10)

with open('texts/Goldilocks1.txt') as f:
   t1=f.read()

with open('texts/Goldilocks2.txt') as f:
   t2=f.read()

with open('texts/US-Elections1.txt') as f:
   t2=f.read()

with open('texts/US-Elections2.txt') as f:
   t2=f.read()

print(web_model.predict([(t1, t1)])) #[4.61928]
print(web_model.predict([(t1, t2)])) #[3.4328978]
print(web_model.predict([(t3, t4)])) #[1.7933936]
print(web_model.predict([(t1, t3)])) #[0.17237176]
print('-------')
print(clinical_model.predict([(t1, t1)])) #[4.939695]
print(clinical_model.predict([(t1, t2)])) #[3.516664]
print(clinical_model.predict([(t3, t4)])) #[1.2412066]
print(clinical_model.predict([(t1, t3)])) #[0.4816986]
import config as cfg
import story_loader
import data_loader
import csv
import numpy as np
from semantic_text_similarity.models import WebBertSimilarity
from semantic_text_similarity.models import ClinicalBertSimilarity

web_model = WebBertSimilarity(device='cuda',
                              batch_size=100)  #defaults to GPU prediction
clinical_model = ClinicalBertSimilarity(
    device='cuda', batch_size=100)  #defaults to GPU prediction

num_sentences = 6
mqa = data_loader.DataLoader()


def stringReplace(input):
    output = input
    if not output:
        output = 'None'
    return output


## LOAD THE TRAIN DATASET and WEB
movie_list = mqa.get_split_movies(split='train')
story, qa = mqa.get_story_qa_data('train', 'split_plot')
with open('sem_align_eval/train_com.csv', mode='w') as out_file:
    file_writer = csv.writer(out_file,
                             delimiter=',',
                             quotechar='"',
import config as cfg
import story_loader
import data_loader
import csv
import numpy as np
from semantic_text_similarity.models import WebBertSimilarity

web_model = WebBertSimilarity(device='cuda',
                              batch_size=10)  #defaults to GPU prediction
num_sentences = 6
mqa = data_loader.DataLoader()


def stringReplace(input):
    output = input
    if not output:
        output = 'None'
    return output


## LOAD THE TRAINING DATASET
movie_list = mqa.get_split_movies(split='train')
story, qa = mqa.get_story_qa_data('train', 'split_plot')

with open('data/sim-train.csv', mode='w') as training_file:
    employee_writer = csv.writer(training_file,
                                 delimiter=',',
                                 quotechar='"',
                                 quoting=csv.QUOTE_MINIMAL)
    employee_writer.writerow([
        '', 'video-id', 'fold-ind', 'startphrase', 'sent1', 'sent2',
    #results = run_unisent(SAME,DIFF,embed,dist_func=angdist,inverse=False)
    results = run_experiment(SAME, DIFF, embed, sqrtdist, inverse=True)
    measures['unisent-sqrtdist'] = results
    print(score(results[0], results[1]))

    #results = run_unisent(SAME,DIFF,embed,dist_func=distance.cosine,inverse=True)
    results = run_experiment(SAME, DIFF, embed, distance.cosine, inverse=True)
    measures['unisent-cosine'] = results
    print(score(results[0], results[1]))

    #Bert
    print("Web BERT")
    #Predicts 1 (unrelated) to 5 (same)
    # It is actually trained on data from this same STS task
    from semantic_text_similarity.models import WebBertSimilarity
    web_bert = web_model = WebBertSimilarity(device='cpu')
    results = run_experiment(
        SAME,
        DIFF,
        lambda x: x,  #<- no prep
        lambda x, y: float(web_bert.predict([(x, y)])),
        inverse=False)
    measures['web-bert'] = results
    print(score(results[0], results[1]))

    print("Clinical BERT")
    from semantic_text_similarity.models import ClinicalBertSimilarity
    clincial_bert = ClinicalBertSimilarity(device='cpu')
    results = run_experiment(
        SAME,
        DIFF,
Exemple #7
0
from semantic_text_similarity.models import WebBertSimilarity

bert_model = WebBertSimilarity(device='cpu', batch_size=1)

def sentence_semantic_similarity(string1, string2):
    """ returns the semantic similarity between two strings using Bert """
    try:
        score = bert_model.predict([(string1, string2)])
        print("SEMANTIC SCORE", score)
        return score.item()
    except:
        return 0.

Exemple #8
0
import requests
import pandas as pd
from semantic_text_similarity.models import WebBertSimilarity


def sentiment_analyzer_scores(text):
    score = analyzer.polarity_scores(text)
    print(score)


#client = MongoClient("mongodb+srv://writer:[email protected]/test?retryWrites=true&w=majority")
#db = client["News"]

#mycol = db["pravda.com.ua"]
dd = {}
web_model = WebBertSimilarity(device='cpu',
                              batch_size=10)  #defaults to GPU prediction
print(
    web_model.predict([("She won an olympic gold medal",
                        "The women is an olympic champion")]))
print(web_model.predict([("You are a loser", "You are a champion")]))
"""
for document in mycol.find():
    try:
        analyzer = SentimentIntensityAnalyzer()
        dic = analyzer.polarity_scores(document["EnglishText"]);
        document["Sentiment"] = dic['pos']
        dd[document["SourceUrl"]] = dd['Text']
    except:
        print("ERROR")
"""
x = mycol.find_one()
from semantic_text_similarity.models import WebBertSimilarity
from semantic_text_similarity.data import load_sts_b_data
from scipy.stats import pearsonr

train, dev, test = load_sts_b_data()

model = WebBertSimilarity()
predictions = model.predict(dev)


print(pearsonr([instance["similarity"] for instance in dev], predictions))