def test_positive(): sent = Sentimental() sentence = 'Крууто. ты лучший ютубер который снимает приколы. отлично .' result = sent.analyze(sentence) assert result['score'] > 0
def test_negative(): sent = Sentimental() sentence = 'Какое жалкое и лицемерное шоу. А вот здесь в комментариях и дизлайках как раз и проявляется настоящее отношение к этому кощею' result = sent.analyze(sentence) assert result['score'] < 0
def test_negation(): sent = Sentimental() sentence = 'It was not bad!' result = sent.analyze(sentence) assert result['score'] == 0 assert result['negative'] == 0
def test_negative(): sent = Sentimental() sentence = 'Today is a bad day!' result = sent.analyze(sentence) assert result['score'] < 0 assert result['positive'] == 0
def test_neutral(): sent = Sentimental() sentence = 'Nothing special!' result = sent.analyze(sentence) assert result['score'] == 0 assert result['negative'] == 0
def test_negation(): sent = Sentimental() sentence = 'Было не плохо!' result = sent.analyze(sentence) assert result['score'] == 0 assert result['negative'] == 0
def test_neutral(): sent = Sentimental() sentence = 'Ничего такого!' result = sent.analyze(sentence) assert result['score'] == 0 assert result['negative'] == 0
class SentimentAnalyzer(): _sentimental = Sentimental(max_ngrams=2, undersample=True) _sentimental.train([get_data_path() + '/sv/ruhburg']) def calculate_scores(marked_tree): reg = re.compile('\(([\w]+) \\\"GHOSTDOC-TOKEN\\\"\)') friend_scores = {} artifact_scores = {} for item in marked_tree: if 'text' in item: senti = SentimentAnalyzer.sentiment(item['text']) m = reg.findall(item['text']) c = sorted(list(Counter(m))) # artifact scores for artifacts in c: s = artifact_scores.get(artifacts, [0, 0]) if senti == 1: s[0] = s[0] + 1 elif senti == -1: s[1] = s[1] + 1 artifact_scores[artifacts] = s # friend scores pairs = list(itertools.combinations(c, 2)) for pair in pairs: s = friend_scores.get(pair, [0, 0]) if senti == 1: s[0] = s[0] + 1 elif senti == -1: s[1] = s[1] + 1 friend_scores[pair] = s friend_scores = { _id: (vals[0] - vals[1]) * math.exp(max(vals) / (vals[0] + vals[1] + 1)) for _id, vals in friend_scores.items() } artifact_scores = { _id: (vals[0] - vals[1]) * math.exp(max(vals) / (vals[0] + vals[1] + 1)) for _id, vals in artifact_scores.items() } return { 'friend_scores': friend_scores, 'artifact_scores': artifact_scores } def sentiment(text): label = max(SentimentAnalyzer._sentimental.sentiment(text)) if label == 'positive': return 1 elif label == 'negative': return -1 else: return 0
def test_empty_string(): sent = Sentimental() sentence = '' result = sent.analyze(sentence) assert result['score'] == 0 assert result['positive'] == 0 assert result['negative'] == 0 assert result['comparative'] == 0
results = [] #take the tweets from the crawled tweets file test = df2.Text #perform the cleaning of the tweets for t in test: results.append(tweet_cleaner_updated(t)) #change for buhari query = 'buhari' lastdf = pd.DataFrame() #I use spacy for parts of speech tagging. pip install -U spacy nlp = spacy.load('en') #I use sentimental to score the sentiment of each tweet. pip install -U git+https://github.com/text-machine-lab/sentimental.git sentiment = Sentimental(word_list='afinn.csv', negation='negations.csv') tweetset = [] scorelist = [] for r in results: doc = nlp(r) #Part of speech taggin. get the noun subjects, noun objects and roots of the tweet sub_toks = [ tok for tok in doc if (tok.dep_ == "nsubj" or tok.dep_ == "dobj" or tok.dep_ == "ROOT") ] #print(r + " " + str(sub_toks)) #check if buhari/atiku is either a subject, object or root word then filter out if query in str(sub_toks): tweetset.append(r) sentence_sentiment = sentiment.analyze(r) scorelist.append(sentence_sentiment['score'])
train_embeddings_df, test_embeddings_df = load_cache('avito_fasttext_300d') print_step('Importing Data 11/19 2/3') train_fe = pd.concat([train_fe, train_embeddings_df], axis=1) print_step('Importing Data 11/19 3/3') test_fe = pd.concat([test_fe, test_embeddings_df], axis=1) train_fe['embedding_mean'] = train_embeddings_df.mean(axis=1) train_fe['embedding_std'] = train_embeddings_df.std(axis=1) train_fe['embedding_skew'] = skew(train_embeddings_df, axis=1) train_fe['embedding_kurtosis'] = kurtosis(train_embeddings_df, axis=1) test_fe['embedding_mean'] = test_embeddings_df.mean(axis=1) test_fe['embedding_std'] = test_embeddings_df.std(axis=1) test_fe['embedding_skew'] = skew(test_embeddings_df, axis=1) test_fe['embedding_kurtosis'] = kurtosis(test_embeddings_df, axis=1) sent = Sentimental() train_fe['sentiment_negative'] = train['description'].apply( lambda s: sent.analyze(s)['negative'] if isinstance(s, str) else 0) test_fe['sentiment_negative'] = test['description'].apply( lambda s: sent.analyze(s)['negative'] if isinstance(s, str) else 0) train_fe['sentiment_positive'] = train['description'].apply( lambda s: sent.analyze(s)['positive'] if isinstance(s, str) else 0) test_fe['sentiment_positive'] = test['description'].apply( lambda s: sent.analyze(s)['positive'] if isinstance(s, str) else 0) train_fe['sentiment'] = train['description'].apply( lambda s: sent.analyze(s)['comparative'] if isinstance(s, str) else 0) test_fe['sentiment'] = test['description'].apply( lambda s: sent.analyze(s)['comparative'] if isinstance(s, str) else 0) print('~~~~~~~~~~~~~~~~~~~~~~~~~~~') print_step('Converting to category')