예제 #1
0
def test_positive():
    sent = Sentimental()

    sentence = 'Крууто. ты лучший ютубер который снимает приколы. отлично .'
    result = sent.analyze(sentence)

    assert result['score'] > 0
예제 #2
0
def test_negative():
    sent = Sentimental()

    sentence = 'Какое жалкое и лицемерное шоу. А вот здесь в комментариях и дизлайках как раз и проявляется настоящее отношение к этому кощею'
    result = sent.analyze(sentence)

    assert result['score'] < 0
예제 #3
0
def test_negation():
    sent = Sentimental()

    sentence = 'It was not bad!'
    result = sent.analyze(sentence)

    assert result['score'] == 0
    assert result['negative'] == 0
예제 #4
0
def test_negative():
    sent = Sentimental()

    sentence = 'Today is a bad day!'
    result = sent.analyze(sentence)

    assert result['score'] < 0
    assert result['positive'] == 0
예제 #5
0
def test_neutral():
    sent = Sentimental()

    sentence = 'Nothing special!'
    result = sent.analyze(sentence)

    assert result['score'] == 0
    assert result['negative'] == 0
예제 #6
0
def test_negation():
    sent = Sentimental()

    sentence = 'Было не плохо!'
    result = sent.analyze(sentence)

    assert result['score'] == 0
    assert result['negative'] == 0
예제 #7
0
def test_neutral():
    sent = Sentimental()

    sentence = 'Ничего такого!'
    result = sent.analyze(sentence)

    assert result['score'] == 0
    assert result['negative'] == 0
예제 #8
0
class SentimentAnalyzer():

    _sentimental = Sentimental(max_ngrams=2, undersample=True)
    _sentimental.train([get_data_path() + '/sv/ruhburg'])

    def calculate_scores(marked_tree):
        reg = re.compile('\(([\w]+) \\\"GHOSTDOC-TOKEN\\\"\)')
        friend_scores = {}
        artifact_scores = {}
        for item in marked_tree:
            if 'text' in item:
                senti = SentimentAnalyzer.sentiment(item['text'])
                m = reg.findall(item['text'])
                c = sorted(list(Counter(m)))

                # artifact scores
                for artifacts in c:
                    s = artifact_scores.get(artifacts, [0, 0])
                    if senti == 1:
                        s[0] = s[0] + 1
                    elif senti == -1:
                        s[1] = s[1] + 1
                    artifact_scores[artifacts] = s

                # friend scores
                pairs = list(itertools.combinations(c, 2))
                for pair in pairs:
                    s = friend_scores.get(pair, [0, 0])
                    if senti == 1:
                        s[0] = s[0] + 1
                    elif senti == -1:
                        s[1] = s[1] + 1
                    friend_scores[pair] = s

        friend_scores = {
            _id: (vals[0] - vals[1]) *
            math.exp(max(vals) / (vals[0] + vals[1] + 1))
            for _id, vals in friend_scores.items()
        }
        artifact_scores = {
            _id: (vals[0] - vals[1]) *
            math.exp(max(vals) / (vals[0] + vals[1] + 1))
            for _id, vals in artifact_scores.items()
        }

        return {
            'friend_scores': friend_scores,
            'artifact_scores': artifact_scores
        }

    def sentiment(text):
        label = max(SentimentAnalyzer._sentimental.sentiment(text))
        if label == 'positive':
            return 1
        elif label == 'negative':
            return -1
        else:
            return 0
def test_empty_string():
    sent = Sentimental()

    sentence = ''
    result = sent.analyze(sentence)

    assert result['score'] == 0
    assert result['positive'] == 0
    assert result['negative'] == 0
    assert result['comparative'] == 0
예제 #10
0
results = []
#take the tweets from the crawled tweets file
test = df2.Text
#perform the cleaning of the tweets
for t in test:
    results.append(tweet_cleaner_updated(t))

#change for buhari
query = 'buhari'

lastdf = pd.DataFrame()
#I use spacy for parts of speech tagging. pip install -U spacy
nlp = spacy.load('en')
#I use sentimental to score the sentiment of each tweet. pip install -U git+https://github.com/text-machine-lab/sentimental.git
sentiment = Sentimental(word_list='afinn.csv', negation='negations.csv')
tweetset = []
scorelist = []
for r in results:
    doc = nlp(r)
    #Part of speech taggin. get the noun subjects, noun objects and roots of the tweet
    sub_toks = [
        tok for tok in doc
        if (tok.dep_ == "nsubj" or tok.dep_ == "dobj" or tok.dep_ == "ROOT")
    ]
    #print(r + " " + str(sub_toks))
    #check if buhari/atiku is either a subject, object or root word then filter out
    if query in str(sub_toks):
        tweetset.append(r)
        sentence_sentiment = sentiment.analyze(r)
        scorelist.append(sentence_sentiment['score'])
예제 #11
0
    train_embeddings_df, test_embeddings_df = load_cache('avito_fasttext_300d')

print_step('Importing Data 11/19 2/3')
train_fe = pd.concat([train_fe, train_embeddings_df], axis=1)
print_step('Importing Data 11/19 3/3')
test_fe = pd.concat([test_fe, test_embeddings_df], axis=1)
train_fe['embedding_mean'] = train_embeddings_df.mean(axis=1)
train_fe['embedding_std'] = train_embeddings_df.std(axis=1)
train_fe['embedding_skew'] = skew(train_embeddings_df, axis=1)
train_fe['embedding_kurtosis'] = kurtosis(train_embeddings_df, axis=1)
test_fe['embedding_mean'] = test_embeddings_df.mean(axis=1)
test_fe['embedding_std'] = test_embeddings_df.std(axis=1)
test_fe['embedding_skew'] = skew(test_embeddings_df, axis=1)
test_fe['embedding_kurtosis'] = kurtosis(test_embeddings_df, axis=1)

sent = Sentimental()
train_fe['sentiment_negative'] = train['description'].apply(
    lambda s: sent.analyze(s)['negative'] if isinstance(s, str) else 0)
test_fe['sentiment_negative'] = test['description'].apply(
    lambda s: sent.analyze(s)['negative'] if isinstance(s, str) else 0)
train_fe['sentiment_positive'] = train['description'].apply(
    lambda s: sent.analyze(s)['positive'] if isinstance(s, str) else 0)
test_fe['sentiment_positive'] = test['description'].apply(
    lambda s: sent.analyze(s)['positive'] if isinstance(s, str) else 0)
train_fe['sentiment'] = train['description'].apply(
    lambda s: sent.analyze(s)['comparative'] if isinstance(s, str) else 0)
test_fe['sentiment'] = test['description'].apply(
    lambda s: sent.analyze(s)['comparative'] if isinstance(s, str) else 0)

print('~~~~~~~~~~~~~~~~~~~~~~~~~~~')
print_step('Converting to category')