Example #1
0
def spacy_sent_benchmark(datasets):

    nlpS = load_spacy_model(textcat='sentiment', vectorError=True)

    for dataset in datasets:
        if dataset == 'euparlsent':
            data = EuroparlSentiment1()
        if dataset == 'lccsent':
            data = LccSentiment()

        df = data.load_with_pandas()

        df['valence'] = df['valence'].map(to_label)

        # predict with spacy sentiment
        def predict(x):
            doc = nlpS(x)
            pred = max(doc.cats.items(), key=operator.itemgetter(1))[0]
            #mathc the labels
            labels = {
                'positiv': 'positive',
                'neutral': 'neutral',
                'negativ': 'negative'
            }
            return labels[pred]

        df['pred'] = df.text.map(lambda x: predict(x))

        report(df['valence'], df['pred'], 'Spacy sentiment (polarity)',
               dataset)
Example #2
0
def spacy_sent_benchmark(datasets):
    
    nlpS = load_spacy_model(textcat='sentiment', vectorError=True)
   
    for dataset in datasets:
        if dataset == 'euparlsent':
            data = EuroparlSentiment1()
        if dataset == 'lccsent':
            data = LccSentiment()

        df = data.load_with_pandas()
        
        df['valence'] = df['valence'].map(sentiment_score_to_label)
        
        # predict with spacy sentiment 
        def predict(x):
            doc = nlpS(x)
            pred = max(doc.cats.items(), key=operator.itemgetter(1))[0]
            #match the labels 
            labels = {'positiv': 'positive', 'neutral': 'neutral', 'negativ': 'negative'}
            return labels[pred]

        spellings_map = {'subjective': 'subjektivt', 'objective': 'objektivt', 'positive': 'positiv', 'negative': 'negativ', 'neutral': 'neutral'}
        start = time.time()
        df['pred'] = df.text.map(lambda x: spellings_map[predict(x)])
        print_speed_performance(start, len(df))

        f1_report(df['valence'], df['pred'], 'Spacy sentiment (polarity)', dataset)
Example #3
0
def afinn_benchmark(datasets):
    afinn = Afinn(language='da', emoticons=True)

    for dataset in datasets:
        if dataset == 'euparlsent':
            data = EuroparlSentiment1()
        if dataset == 'lccsent':
            data = LccSentiment()

        df = data.load_with_pandas()

        df['pred'] = df.text.map(afinn.score).map(to_label)
        df['valence'] = df['valence'].map(to_label)

        report(df['valence'], df['pred'], 'Afinn', dataset)
Example #4
0
def sentida_benchmark(datasets):
    "The scripts download from github from sentindaV2 and place it in cache folder"
    DEFAULT_CACHE_DIR = os.path.join(str(Path.home()), '.danlp')
    print(os.getcwd())
    workdir = DEFAULT_CACHE_DIR + '/sentida'
    print(workdir)
    if not os.path.isdir(workdir):
        os.mkdir(workdir)
        url = "https://raw.githubusercontent.com/esbenkc/emma/master/SentidaV2/"
        for file in ['SentidaV2.py', 'aarup.csv', 'intensifier.csv']:
            urllib.request.urlretrieve(url + file, workdir + '/' + file)

    sys.path.insert(1, workdir)
    os.chdir(workdir + '/')
    sys.stdout = open(os.devnull, 'w')
    from SentidaV2 import sentidaV2
    sys.stdout = sys.__stdout__

    def sentida_score(sent):
        return sentidaV2(sent, output='total')

    for dataset in datasets:
        if dataset == 'euparlsent':
            data = EuroparlSentiment()
        if dataset == 'lccsent':
            data = LccSentiment()

        df = data.load_with_pandas()

        df['pred'] = df.text.map(sentida_score).map(to_label_sentida)
        df['valence'] = df['valence'].map(to_label)

        report(df['valence'], df['pred'], 'SentidaV2', dataset)
Example #5
0
def bert_sent_benchmark(datasets):
    model = load_bert_tone_model()

    for dataset in datasets:
        if dataset == 'euparlsent':
            data = EuroparlSentiment1()
        if dataset == 'lccsent':
            data = LccSentiment()

        df = data.load_with_pandas()

        df['valence'] = df['valence'].map(to_label)
        # predict with bert sentiment
        df['pred'] = df.text.map(
            lambda x: model.predict(x, analytic=False)['polarity'])

        report(df['valence'], df['pred'], 'BERT_Tone (polarity)', dataset)
Example #6
0
def afinn_benchmark(datasets):
    afinn = Afinn(language='da', emoticons=True)
    
    for dataset in datasets:
        if dataset == 'euparlsent':
            data = EuroparlSentiment1()
        if dataset == 'lccsent':
            data = LccSentiment()

        df = data.load_with_pandas()


        start = time.time()
        df['pred'] = df.text.map(afinn.score).map(sentiment_score_to_label)
        print_speed_performance(start, len(df))
        df['valence'] = df['valence'].map(sentiment_score_to_label)

        f1_report(df['valence'], df['pred'], 'Afinn', dataset)
Example #7
0
def sentida_benchmark(datasets):

    from sentida import Sentida
    sentida = Sentida()

    def sentida_score(sent):
        return sentida.sentida(sent, output='total')

    for dataset in datasets:
        if dataset == 'euparlsent':
            data = EuroparlSentiment1()
        if dataset == 'lccsent':
            data = LccSentiment()

        df = data.load_with_pandas()

        df['pred'] = df.text.map(sentida_score).map(to_label_sentida)
        df['valence'] = df['valence'].map(to_label)

        report(df['valence'], df['pred'], 'Sentida', dataset)
Example #8
0
def bert_sent_benchmark(datasets):
    model = load_bert_tone_model()
    
    for dataset in datasets:
        if dataset == 'euparlsent':
            data = EuroparlSentiment1()
        if dataset == 'lccsent':
            data = LccSentiment()

        df = data.load_with_pandas()


        df['valence'] = df['valence'].map(sentiment_score_to_label)
        # predict with bert sentiment 
        start = time.time()
        df['pred'] = df.text.map(lambda x: model.predict(x, analytic=False)['polarity'])
        print_speed_performance(start, len(df))
        spellings_map = {'subjective': 'subjektivt', 'objective': 'objektivt', 'positive': 'positiv', 'negative': 'negativ', 'neutral': 'neutral'}
        df['pred'] = df['pred'].map(lambda x: spellings_map[x])

        f1_report(df['valence'], df['pred'], 'BERT_Tone (polarity)', dataset)
Example #9
0
 def test_lccsentiment(self):
     sent = LccSentiment()
     df = sent.load_with_pandas()
     self.assertEqual(len(df), 499)