Beispiel #1
0
def Bert_Sim_Model(db_path, path, resp):
    """
    Loads the Bert Similarity pre-trained model to analyze the fuzzy words
    """
    model = WebBertSimilarity(device='cpu', batch_size=10)
    df = select_db(db_path)
    ind = select_rest(path, resp)

    to_compare = df[df.restaurant == ind]
    m = []
    for res in resp:
        for food in to_compare.name:
            pred = float(model.predict([(res, food)]))
            if pred > 2:
                m.append([res, to_compare.mean_value[to_compare.name == food]])

    df_final = pd.DataFrame(columns=['item', 'calories'])
    for row in m:
        df = pd.DataFrame(row)
        df_final = pd.concat([df_final, df])

    return df_final
 def predict(self):
     model = WebBertSimilarity()
     model_input = [(self.sentence1, self.sentence2)]
     predictions = model.predict(model_input)
     #print(predictions)
     return predictions
Beispiel #3
0
from semantic_text_similarity.models import WebBertSimilarity
from semantic_text_similarity.models import ClinicalBertSimilarity

web_model = WebBertSimilarity(device='cpu', batch_size=10)

clinical_model = ClinicalBertSimilarity(device='cpu', batch_size=10)

with open('texts/Goldilocks1.txt') as f:
   t1=f.read()

with open('texts/Goldilocks2.txt') as f:
   t2=f.read()

with open('texts/US-Elections1.txt') as f:
   t2=f.read()

with open('texts/US-Elections2.txt') as f:
   t2=f.read()

print(web_model.predict([(t1, t1)])) #[4.61928]
print(web_model.predict([(t1, t2)])) #[3.4328978]
print(web_model.predict([(t3, t4)])) #[1.7933936]
print(web_model.predict([(t1, t3)])) #[0.17237176]
print('-------')
print(clinical_model.predict([(t1, t1)])) #[4.939695]
print(clinical_model.predict([(t1, t2)])) #[3.516664]
print(clinical_model.predict([(t3, t4)])) #[1.2412066]
print(clinical_model.predict([(t1, t3)])) #[0.4816986]
    for q in qa:
        movie_idx = q[4]
        question = q[1]
        ans_1 = stringReplace(q[2][0])
        ans_2 = stringReplace(q[2][1])
        ans_3 = stringReplace(q[2][2])
        ans_4 = stringReplace(q[2][3])
        ans_5 = stringReplace(q[2][4])
        index = 0
        movie_align = ''
        sim_mtx = []
        comparison_metric = question + ans_1 + ans_2 + ans_3 + ans_4 + ans_5
        sen_mtx = []
        for sen in story[movie_idx]:
            sen_mtx.append((sen, comparison_metric))
        sim_mtx_w = web_model.predict(sen_mtx)
        sim_mtx_c = clinical_model.predict(sen_mtx)
        sim_mtx = np.divide(sim_mtx_w, np.mean(sim_mtx_w)) + np.divide(
            sim_mtx_c, np.mean(sim_mtx_c))
        top_sentences = np.asarray(sim_mtx).argsort()[-num_sentences:][::-1]
        for a in top_sentences:
            movie_align = movie_align + " ".join(
                story[movie_idx][a].split("\n"))
        combined = " ".join(movie_align.split("\n"))
        print("COMB", counter)
        file_writer.writerow([counter, q[5], top_sentences])
        correct_ans = q[3]
        counter = counter + 1

## LOAD THE TRAIN DATASET and CLINICAL
movie_list = mqa.get_split_movies(split='train')
        movie_idx = q[4]
        movie_story = ', '.join(story[movie_idx])
        question = q[1]
        ans_1 = stringReplace(q[2][0])
        ans_2 = stringReplace(q[2][1])
        ans_3 = stringReplace(q[2][2])
        ans_4 = stringReplace(q[2][3])
        ans_5 = stringReplace(q[2][4])
        index = 0
        #story_split = movie_story.splitlines()
        movie_align = ''
        sim_mtx = []
        comparison_metric = question + ans_1 + ans_2 + ans_3 + ans_4 + ans_5
        for sen in story[movie_idx]:
            sim_mtx.append(
                web_model.predict([(sen, comparison_metric)]).item())
        top_sentences = np.asarray(sim_mtx).argsort()[-num_sentences:][::-1]
        for a in top_sentences:
            movie_align = movie_align + " ".join(
                story[movie_idx][a].split("\n"))
        combined = " ".join(movie_align.split("\n")) + question
        correct_ans = q[3]

        employee_writer.writerow([
            counter, movie_idx, counter, combined, movie_align, question,
            movie_align, ans_1, ans_2, ans_3, ans_4, ans_5, correct_ans
        ])
        counter = counter + 1

## LOAD THE VALIDATION DATASET
movie_list = mqa.get_split_movies(split='val')
Beispiel #6
0

def sentiment_analyzer_scores(text):
    score = analyzer.polarity_scores(text)
    print(score)


#client = MongoClient("mongodb+srv://writer:[email protected]/test?retryWrites=true&w=majority")
#db = client["News"]

#mycol = db["pravda.com.ua"]
dd = {}
web_model = WebBertSimilarity(device='cpu',
                              batch_size=10)  #defaults to GPU prediction
print(
    web_model.predict([("She won an olympic gold medal",
                        "The women is an olympic champion")]))
print(web_model.predict([("You are a loser", "You are a champion")]))
"""
for document in mycol.find():
    try:
        analyzer = SentimentIntensityAnalyzer()
        dic = analyzer.polarity_scores(document["EnglishText"]);
        document["Sentiment"] = dic['pos']
        dd[document["SourceUrl"]] = dd['Text']
    except:
        print("ERROR")
"""
x = mycol.find_one()

#
#model(x['EnglishText'])
from semantic_text_similarity.models import WebBertSimilarity
from semantic_text_similarity.data import load_sts_b_data
from scipy.stats import pearsonr

train, dev, test = load_sts_b_data()

model = WebBertSimilarity()
predictions = model.predict(dev)


print(pearsonr([instance["similarity"] for instance in dev], predictions))