def spacy_sent_benchmark(datasets):
    
    nlpS = load_spacy_model(textcat='sentiment', vectorError=True)
   
    for dataset in datasets:
        if dataset == 'euparlsent':
            data = EuroparlSentiment1()
        if dataset == 'lccsent':
            data = LccSentiment()

        df = data.load_with_pandas()
        
        df['valence'] = df['valence'].map(sentiment_score_to_label)
        
        # predict with spacy sentiment 
        def predict(x):
            doc = nlpS(x)
            pred = max(doc.cats.items(), key=operator.itemgetter(1))[0]
            #match the labels 
            labels = {'positiv': 'positive', 'neutral': 'neutral', 'negativ': 'negative'}
            return labels[pred]

        spellings_map = {'subjective': 'subjektivt', 'objective': 'objektivt', 'positive': 'positiv', 'negative': 'negativ', 'neutral': 'neutral'}
        start = time.time()
        df['pred'] = df.text.map(lambda x: spellings_map[predict(x)])
        print_speed_performance(start, len(df))

        f1_report(df['valence'], df['pred'], 'Spacy sentiment (polarity)', dataset)
Beispiel #2
0
def spacy_sent_benchmark(datasets):

    nlpS = load_spacy_model(textcat='sentiment', vectorError=True)

    for dataset in datasets:
        if dataset == 'euparlsent':
            data = EuroparlSentiment1()
        if dataset == 'lccsent':
            data = LccSentiment()

        df = data.load_with_pandas()

        df['valence'] = df['valence'].map(to_label)

        # predict with spacy sentiment
        def predict(x):
            doc = nlpS(x)
            pred = max(doc.cats.items(), key=operator.itemgetter(1))[0]
            #mathc the labels
            labels = {
                'positiv': 'positive',
                'neutral': 'neutral',
                'negativ': 'negative'
            }
            return labels[pred]

        df['pred'] = df.text.map(lambda x: predict(x))

        report(df['valence'], df['pred'], 'Spacy sentiment (polarity)',
               dataset)
Beispiel #3
0
def afinn_benchmark(datasets):
    afinn = Afinn(language='da', emoticons=True)

    for dataset in datasets:
        if dataset == 'euparlsent':
            data = EuroparlSentiment1()
        if dataset == 'lccsent':
            data = LccSentiment()

        df = data.load_with_pandas()

        df['pred'] = df.text.map(afinn.score).map(to_label)
        df['valence'] = df['valence'].map(to_label)

        report(df['valence'], df['pred'], 'Afinn', dataset)
Beispiel #4
0
def bert_sent_benchmark(datasets):
    model = load_bert_tone_model()

    for dataset in datasets:
        if dataset == 'euparlsent':
            data = EuroparlSentiment1()
        if dataset == 'lccsent':
            data = LccSentiment()

        df = data.load_with_pandas()

        df['valence'] = df['valence'].map(to_label)
        # predict with bert sentiment
        df['pred'] = df.text.map(
            lambda x: model.predict(x, analytic=False)['polarity'])

        report(df['valence'], df['pred'], 'BERT_Tone (polarity)', dataset)
def afinn_benchmark(datasets):
    afinn = Afinn(language='da', emoticons=True)
    
    for dataset in datasets:
        if dataset == 'euparlsent':
            data = EuroparlSentiment1()
        if dataset == 'lccsent':
            data = LccSentiment()

        df = data.load_with_pandas()


        start = time.time()
        df['pred'] = df.text.map(afinn.score).map(sentiment_score_to_label)
        print_speed_performance(start, len(df))
        df['valence'] = df['valence'].map(sentiment_score_to_label)

        f1_report(df['valence'], df['pred'], 'Afinn', dataset)
Beispiel #6
0
def sentida_benchmark(datasets):

    from sentida import Sentida
    sentida = Sentida()

    def sentida_score(sent):
        return sentida.sentida(sent, output='total')

    for dataset in datasets:
        if dataset == 'euparlsent':
            data = EuroparlSentiment1()
        if dataset == 'lccsent':
            data = LccSentiment()

        df = data.load_with_pandas()

        df['pred'] = df.text.map(sentida_score).map(to_label_sentida)
        df['valence'] = df['valence'].map(to_label)

        report(df['valence'], df['pred'], 'Sentida', dataset)
def bert_sent_benchmark(datasets):
    model = load_bert_tone_model()
    
    for dataset in datasets:
        if dataset == 'euparlsent':
            data = EuroparlSentiment1()
        if dataset == 'lccsent':
            data = LccSentiment()

        df = data.load_with_pandas()


        df['valence'] = df['valence'].map(sentiment_score_to_label)
        # predict with bert sentiment 
        start = time.time()
        df['pred'] = df.text.map(lambda x: model.predict(x, analytic=False)['polarity'])
        print_speed_performance(start, len(df))
        spellings_map = {'subjective': 'subjektivt', 'objective': 'objektivt', 'positive': 'positiv', 'negative': 'negativ', 'neutral': 'neutral'}
        df['pred'] = df['pred'].map(lambda x: spellings_map[x])

        f1_report(df['valence'], df['pred'], 'BERT_Tone (polarity)', dataset)
Beispiel #8
0
 def test_europarlsentiment1(self):
     eusent = EuroparlSentiment1()
     df = eusent.load_with_pandas()
     self.assertEqual(len(df), 184)