def spacy_sent_benchmark(datasets): nlpS = load_spacy_model(textcat='sentiment', vectorError=True) for dataset in datasets: if dataset == 'euparlsent': data = EuroparlSentiment1() if dataset == 'lccsent': data = LccSentiment() df = data.load_with_pandas() df['valence'] = df['valence'].map(to_label) # predict with spacy sentiment def predict(x): doc = nlpS(x) pred = max(doc.cats.items(), key=operator.itemgetter(1))[0] #mathc the labels labels = { 'positiv': 'positive', 'neutral': 'neutral', 'negativ': 'negative' } return labels[pred] df['pred'] = df.text.map(lambda x: predict(x)) report(df['valence'], df['pred'], 'Spacy sentiment (polarity)', dataset)
def spacy_sent_benchmark(datasets): nlpS = load_spacy_model(textcat='sentiment', vectorError=True) for dataset in datasets: if dataset == 'euparlsent': data = EuroparlSentiment1() if dataset == 'lccsent': data = LccSentiment() df = data.load_with_pandas() df['valence'] = df['valence'].map(sentiment_score_to_label) # predict with spacy sentiment def predict(x): doc = nlpS(x) pred = max(doc.cats.items(), key=operator.itemgetter(1))[0] #match the labels labels = {'positiv': 'positive', 'neutral': 'neutral', 'negativ': 'negative'} return labels[pred] spellings_map = {'subjective': 'subjektivt', 'objective': 'objektivt', 'positive': 'positiv', 'negative': 'negativ', 'neutral': 'neutral'} start = time.time() df['pred'] = df.text.map(lambda x: spellings_map[predict(x)]) print_speed_performance(start, len(df)) f1_report(df['valence'], df['pred'], 'Spacy sentiment (polarity)', dataset)
def afinn_benchmark(datasets): afinn = Afinn(language='da', emoticons=True) for dataset in datasets: if dataset == 'euparlsent': data = EuroparlSentiment1() if dataset == 'lccsent': data = LccSentiment() df = data.load_with_pandas() df['pred'] = df.text.map(afinn.score).map(to_label) df['valence'] = df['valence'].map(to_label) report(df['valence'], df['pred'], 'Afinn', dataset)
def sentida_benchmark(datasets): "The scripts download from github from sentindaV2 and place it in cache folder" DEFAULT_CACHE_DIR = os.path.join(str(Path.home()), '.danlp') print(os.getcwd()) workdir = DEFAULT_CACHE_DIR + '/sentida' print(workdir) if not os.path.isdir(workdir): os.mkdir(workdir) url = "https://raw.githubusercontent.com/esbenkc/emma/master/SentidaV2/" for file in ['SentidaV2.py', 'aarup.csv', 'intensifier.csv']: urllib.request.urlretrieve(url + file, workdir + '/' + file) sys.path.insert(1, workdir) os.chdir(workdir + '/') sys.stdout = open(os.devnull, 'w') from SentidaV2 import sentidaV2 sys.stdout = sys.__stdout__ def sentida_score(sent): return sentidaV2(sent, output='total') for dataset in datasets: if dataset == 'euparlsent': data = EuroparlSentiment() if dataset == 'lccsent': data = LccSentiment() df = data.load_with_pandas() df['pred'] = df.text.map(sentida_score).map(to_label_sentida) df['valence'] = df['valence'].map(to_label) report(df['valence'], df['pred'], 'SentidaV2', dataset)
def bert_sent_benchmark(datasets): model = load_bert_tone_model() for dataset in datasets: if dataset == 'euparlsent': data = EuroparlSentiment1() if dataset == 'lccsent': data = LccSentiment() df = data.load_with_pandas() df['valence'] = df['valence'].map(to_label) # predict with bert sentiment df['pred'] = df.text.map( lambda x: model.predict(x, analytic=False)['polarity']) report(df['valence'], df['pred'], 'BERT_Tone (polarity)', dataset)
def afinn_benchmark(datasets): afinn = Afinn(language='da', emoticons=True) for dataset in datasets: if dataset == 'euparlsent': data = EuroparlSentiment1() if dataset == 'lccsent': data = LccSentiment() df = data.load_with_pandas() start = time.time() df['pred'] = df.text.map(afinn.score).map(sentiment_score_to_label) print_speed_performance(start, len(df)) df['valence'] = df['valence'].map(sentiment_score_to_label) f1_report(df['valence'], df['pred'], 'Afinn', dataset)
def sentida_benchmark(datasets): from sentida import Sentida sentida = Sentida() def sentida_score(sent): return sentida.sentida(sent, output='total') for dataset in datasets: if dataset == 'euparlsent': data = EuroparlSentiment1() if dataset == 'lccsent': data = LccSentiment() df = data.load_with_pandas() df['pred'] = df.text.map(sentida_score).map(to_label_sentida) df['valence'] = df['valence'].map(to_label) report(df['valence'], df['pred'], 'Sentida', dataset)
def bert_sent_benchmark(datasets): model = load_bert_tone_model() for dataset in datasets: if dataset == 'euparlsent': data = EuroparlSentiment1() if dataset == 'lccsent': data = LccSentiment() df = data.load_with_pandas() df['valence'] = df['valence'].map(sentiment_score_to_label) # predict with bert sentiment start = time.time() df['pred'] = df.text.map(lambda x: model.predict(x, analytic=False)['polarity']) print_speed_performance(start, len(df)) spellings_map = {'subjective': 'subjektivt', 'objective': 'objektivt', 'positive': 'positiv', 'negative': 'negativ', 'neutral': 'neutral'} df['pred'] = df['pred'].map(lambda x: spellings_map[x]) f1_report(df['valence'], df['pred'], 'BERT_Tone (polarity)', dataset)
def test_lccsentiment(self): sent = LccSentiment() df = sent.load_with_pandas() self.assertEqual(len(df), 499)