def Bert_Sim_Model(db_path, path, resp): """ Loads the Bert Similarity pre-trained model to analyze the fuzzy words """ model = WebBertSimilarity(device='cpu', batch_size=10) df = select_db(db_path) ind = select_rest(path, resp) to_compare = df[df.restaurant == ind] m = [] for res in resp: for food in to_compare.name: pred = float(model.predict([(res, food)])) if pred > 2: m.append([res, to_compare.mean_value[to_compare.name == food]]) df_final = pd.DataFrame(columns=['item', 'calories']) for row in m: df = pd.DataFrame(row) df_final = pd.concat([df_final, df]) return df_final
def predict(self): model = WebBertSimilarity() model_input = [(self.sentence1, self.sentence2)] predictions = model.predict(model_input) #print(predictions) return predictions
from semantic_text_similarity.models import WebBertSimilarity from semantic_text_similarity.models import ClinicalBertSimilarity web_model = WebBertSimilarity(device='cpu', batch_size=10) clinical_model = ClinicalBertSimilarity(device='cpu', batch_size=10) with open('texts/Goldilocks1.txt') as f: t1=f.read() with open('texts/Goldilocks2.txt') as f: t2=f.read() with open('texts/US-Elections1.txt') as f: t2=f.read() with open('texts/US-Elections2.txt') as f: t2=f.read() print(web_model.predict([(t1, t1)])) #[4.61928] print(web_model.predict([(t1, t2)])) #[3.4328978] print(web_model.predict([(t3, t4)])) #[1.7933936] print(web_model.predict([(t1, t3)])) #[0.17237176] print('-------') print(clinical_model.predict([(t1, t1)])) #[4.939695] print(clinical_model.predict([(t1, t2)])) #[3.516664] print(clinical_model.predict([(t3, t4)])) #[1.2412066] print(clinical_model.predict([(t1, t3)])) #[0.4816986]
import config as cfg import story_loader import data_loader import csv import numpy as np from semantic_text_similarity.models import WebBertSimilarity from semantic_text_similarity.models import ClinicalBertSimilarity web_model = WebBertSimilarity(device='cuda', batch_size=100) #defaults to GPU prediction clinical_model = ClinicalBertSimilarity( device='cuda', batch_size=100) #defaults to GPU prediction num_sentences = 6 mqa = data_loader.DataLoader() def stringReplace(input): output = input if not output: output = 'None' return output ## LOAD THE TRAIN DATASET and WEB movie_list = mqa.get_split_movies(split='train') story, qa = mqa.get_story_qa_data('train', 'split_plot') with open('sem_align_eval/train_com.csv', mode='w') as out_file: file_writer = csv.writer(out_file, delimiter=',', quotechar='"',
import config as cfg import story_loader import data_loader import csv import numpy as np from semantic_text_similarity.models import WebBertSimilarity web_model = WebBertSimilarity(device='cuda', batch_size=10) #defaults to GPU prediction num_sentences = 6 mqa = data_loader.DataLoader() def stringReplace(input): output = input if not output: output = 'None' return output ## LOAD THE TRAINING DATASET movie_list = mqa.get_split_movies(split='train') story, qa = mqa.get_story_qa_data('train', 'split_plot') with open('data/sim-train.csv', mode='w') as training_file: employee_writer = csv.writer(training_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) employee_writer.writerow([ '', 'video-id', 'fold-ind', 'startphrase', 'sent1', 'sent2',
#results = run_unisent(SAME,DIFF,embed,dist_func=angdist,inverse=False) results = run_experiment(SAME, DIFF, embed, sqrtdist, inverse=True) measures['unisent-sqrtdist'] = results print(score(results[0], results[1])) #results = run_unisent(SAME,DIFF,embed,dist_func=distance.cosine,inverse=True) results = run_experiment(SAME, DIFF, embed, distance.cosine, inverse=True) measures['unisent-cosine'] = results print(score(results[0], results[1])) #Bert print("Web BERT") #Predicts 1 (unrelated) to 5 (same) # It is actually trained on data from this same STS task from semantic_text_similarity.models import WebBertSimilarity web_bert = web_model = WebBertSimilarity(device='cpu') results = run_experiment( SAME, DIFF, lambda x: x, #<- no prep lambda x, y: float(web_bert.predict([(x, y)])), inverse=False) measures['web-bert'] = results print(score(results[0], results[1])) print("Clinical BERT") from semantic_text_similarity.models import ClinicalBertSimilarity clincial_bert = ClinicalBertSimilarity(device='cpu') results = run_experiment( SAME, DIFF,
from semantic_text_similarity.models import WebBertSimilarity bert_model = WebBertSimilarity(device='cpu', batch_size=1) def sentence_semantic_similarity(string1, string2): """ returns the semantic similarity between two strings using Bert """ try: score = bert_model.predict([(string1, string2)]) print("SEMANTIC SCORE", score) return score.item() except: return 0.
import requests import pandas as pd from semantic_text_similarity.models import WebBertSimilarity def sentiment_analyzer_scores(text): score = analyzer.polarity_scores(text) print(score) #client = MongoClient("mongodb+srv://writer:[email protected]/test?retryWrites=true&w=majority") #db = client["News"] #mycol = db["pravda.com.ua"] dd = {} web_model = WebBertSimilarity(device='cpu', batch_size=10) #defaults to GPU prediction print( web_model.predict([("She won an olympic gold medal", "The women is an olympic champion")])) print(web_model.predict([("You are a loser", "You are a champion")])) """ for document in mycol.find(): try: analyzer = SentimentIntensityAnalyzer() dic = analyzer.polarity_scores(document["EnglishText"]); document["Sentiment"] = dic['pos'] dd[document["SourceUrl"]] = dd['Text'] except: print("ERROR") """ x = mycol.find_one()
from semantic_text_similarity.models import WebBertSimilarity from semantic_text_similarity.data import load_sts_b_data from scipy.stats import pearsonr train, dev, test = load_sts_b_data() model = WebBertSimilarity() predictions = model.predict(dev) print(pearsonr([instance["similarity"] for instance in dev], predictions))