def bert_sent_benchmark(): model = load_bert_tone_model() start = time.time() preds = df_val.text.map(lambda x: model.predict(x)) print_speed_performance(start, len(df_val)) spellings_map = {'subjective': 'subjektivt', 'objective': 'objektivt', 'positive': 'positiv', 'negative': 'negativ', 'neutral': 'neutral'} df_val['bert_ana'] = preds.map(lambda x: spellings_map[x['analytic']]) df_val['bert_pol'] = preds.map(lambda x: spellings_map[x['polarity']]) f1_report(df_val['polarity'], df_val['bert_pol'], 'BERT_Tone (polarity)', "twitter_sentiment(val)") f1_report(df_val['sub/obj'], df_val['bert_ana'], 'BERT_Tone (sub/obj)', "twitter_sentiment(val)")
def bert_sent_benchmark(datasets): model = load_bert_tone_model() for dataset in datasets: if dataset == 'euparlsent': data = EuroparlSentiment1() if dataset == 'lccsent': data = LccSentiment() df = data.load_with_pandas() df['valence'] = df['valence'].map(to_label) # predict with bert sentiment df['pred'] = df.text.map( lambda x: model.predict(x, analytic=False)['polarity']) report(df['valence'], df['pred'], 'BERT_Tone (polarity)', dataset)
def __init__(self, hisia=True): try: from afinn import Afinn self.afinn = Afinn(language='da') except: print('afinn not installed') self.afinn = False try: from sentida import Sentida self.sent = Sentida() except: print('sentida not loading') self.sent = False try: from danlp.models import load_bert_emotion_model self.classifier = load_bert_emotion_model() except: self.classifier = False print('bert emotion not loading') try: from danlp.models import load_bert_tone_model self.classifier_tone = load_bert_tone_model() except: print('bert tone not working') self.classifier_tone = False try: from danlp.models import load_spacy_model self.nlp = load_spacy_model( textcat='sentiment', vectorError=True ) # if you got an error saying da.vectors not found, try setting vectorError=True - it is an temp fix except: print('spacy sentiment not working') self.nlp = False if hisia: try: from hisia import Hisia self.hisia = Hisia except: self.hisia = False print('hisia not working') else: self.hisia = False
def test_predictions(self): model = load_bert_tone_model() self.assertEqual(model.predict('han er 12 år', polarity=False), { 'analytic': 'objective', 'polarity': None }) self.assertEqual(model.predict('han gør det godt', analytic=False), { 'analytic': None, 'polarity': 'positive' }) self.assertEqual(model.predict('Det er super dårligt'), { 'analytic': 'subjective', 'polarity': 'negative' }) self.assertEqual(model._classes()[0], ['positive', 'neutral', 'negative']) self.assertTrue( len( model.predict_proba('jeg er meget glad idag', polarity=False) [0]) == 2)
def bert_sent_benchmark(datasets): model = load_bert_tone_model() for dataset in datasets: if dataset == 'euparlsent': data = EuroparlSentiment1() if dataset == 'lccsent': data = LccSentiment() df = data.load_with_pandas() df['valence'] = df['valence'].map(sentiment_score_to_label) # predict with bert sentiment start = time.time() df['pred'] = df.text.map(lambda x: model.predict(x, analytic=False)['polarity']) print_speed_performance(start, len(df)) spellings_map = {'subjective': 'subjektivt', 'objective': 'objektivt', 'positive': 'positiv', 'negative': 'negativ', 'neutral': 'neutral'} df['pred'] = df['pred'].map(lambda x: spellings_map[x]) f1_report(df['valence'], df['pred'], 'BERT_Tone (polarity)', dataset)
def __get_sent_danlp_bert_tone(texts, tokenlist): from danlp.models import load_bert_tone_model classifier = load_bert_tone_model() def get_proba(txt): res = classifier.predict_proba(txt) polarity, analytic = res pos, neu, neg = polarity obj, subj = analytic return pos, neu, neg, obj, subj return pd.DataFrame( [get_proba(txt) for txt in texts], columns=[ "polarity_pos", "polarity_neu", "polarity_neg", "analytic_obj", "analytic_subj", ], )