Exemple #1
0
def calculate_department_scores():
    os.chdir("../output_500_itemcount")
    afinn = Afinn()
    afinn.setup_from_file(os.path.join(afinn.data_dir(), 'AFINN-111.txt'),
                          word_boundary=False)

    # dictionary where the keys are departments, values are lists of tuples containing a url, doc score and doc length
    # for each document in the department
    department_scores = {}
    # loop through documents and accumulate scores for each department
    for f in glob.glob("*.jsonl"):
        with jsonlines.open(f) as reader:
            for obj in reader:
                dep = format(obj['field'])
                url = format(obj['url'])
                title = format(obj['title'])
                text = format(obj['text'])

                doc_score = get_score(title, text, afinn)
                doc_length = len(title + " " + text)
                doc_tuple = (url, doc_score, doc_length)
                if dep not in department_scores:  # checking if the department is present in the d
                    department_scores[dep] = [doc_tuple]
                else:
                    department_scores[dep].append(doc_tuple)

    return department_scores
Exemple #2
0
def test_data():
    """Test data files for format."""
    afinn = Afinn()
    filenames = listdir(afinn.data_dir())
    for filename in filenames:
        if not filename.endswith('.txt'):
            continue
        full_filename = join(afinn.data_dir(), filename)
        with io.open(full_filename, encoding='UTF-8') as fid:
            for line in fid:
                # There should be the phrase and the score
                # and nothing more
                assert len(line.split('\t')) == 2

                # The score should be interpretable as an int
                phrase, score = line.split('\t')
                assert type(int(score)) == int
Exemple #3
0
def test_data():
    """Test data files for format."""
    afinn = Afinn()
    filenames = listdir(afinn.data_dir())
    for filename in filenames:
        if not filename.endswith('.txt'):
            continue
        full_filename = join(afinn.data_dir(), filename)
        with io.open(full_filename, encoding='UTF-8') as fid:
            for line in fid:
                # There should be the phrase and the score
                # and nothing more
                assert len(line.split('\t')) == 2

                # The score should be interpretable as an int
                phrase, score = line.split('\t')
                assert type(int(score)) == int
Exemple #4
0
def test_emoticon():
    afinn = Afinn()
    afinn.setup_from_file(join(afinn.data_dir(), 'AFINN-emoticon-8.txt'),
                          word_boundary=False)
    score = afinn.score(':-)')
    assert score > 0

    score = afinn.score('This is a :-) smiley')
    assert score > 0

    score = afinn.score('Just so XOXO.')
    assert score > 0
Exemple #5
0
def test_emoticon():
    afinn = Afinn()
    afinn.setup_from_file(join(afinn.data_dir(), 'AFINN-emoticon-8.txt'),
                          with_word_boundary=False)
    score = afinn.score(':-)')
    assert score > 0

    score = afinn.score('This is a :-) smiley')
    assert score > 0

    score = afinn.score('Just so XOXO.')
    assert score > 0
Exemple #6
0
def test_emoticon_upper_case():
    afinn = Afinn()
    afinn.setup_from_file(join(afinn.data_dir(), 'AFINN-emoticon-8.txt'),
                          word_boundary=False)

    score = afinn.score(':d')
    assert score == 0

    # TODO
    score = afinn.score(':D')
    # assert score > 0

    score = afinn.score('It is so: :D')
Exemple #7
0
def test_emoticon_upper_case():
    afinn = Afinn()
    afinn.setup_from_file(join(afinn.data_dir(), 'AFINN-emoticon-8.txt'),
                          word_boundary=False)

    score = afinn.score(':d')
    assert score == 0

    # TODO
    score = afinn.score(':D')
    # assert score > 0

    score = afinn.score('It is so: :D')
Exemple #8
0
from __future__ import print_function

import glob, os
import sys
import jsonlines

from afinn import Afinn
afinn = Afinn()

# afinn.setup_from_file(os.path.join(afinn.data_dir(), 'AFINN-165.txt'),
#                       word_boundary=False)

afinn.setup_from_file(os.path.join(afinn.data_dir(), 'AFINN-111.txt'),
                      word_boundary=False)
"""
get_score

for now, just return length
we're supposed to do sentiment analysis here and return the text's score
"""


def get_score(field, title, text):

    # title_score = afinn.score(title)  # using regex patterns
    token_score = afinn.score_with_wordlist(text)  # using word_list
    # sum_score = title_score + text_score
    # print(title)

    # if "mystery" in field:
    #     print(field,title,text)