Example #1
0
def extract_textfeatures_flavor2(comment):
    words = ['f**k', 'bitch', 'shut', 'hate', 'suck', 'gay', 'ugli', 'work', 'beauti', 'sick']
    analyzer = vaderSentiment.SentimentIntensityAnalyzer()
    vader = analyzer.polarity_scores(comment).get('compound')
    f = pd.read_csv('Terms-to-Block.csv')['0'].tolist()
    ps = PorterStemmer()
    wordnet_lemmatizer = WordNetLemmatizer()
    words = [ps.stem(wordnet_lemmatizer.lemmatize(w)) for w in words]
    comment = unicode(comment, errors='ignore')
    comment = comment.decode('utf-8').encode('ascii', errors='ignore')
    punc = sum([comment.count(i) for i in ["!", "?"]])
    url =len(re.findall('https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+', comment))
    user = len(re.findall(r'(?<=^|(?<=[^a-zA-Z0-9-_\.]))@([A-Za-z]+[A-Za-z0-9-_]+)', comment))
    hashtag = len(re.findall(r"#(\w+)", comment))
    comment = ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)"," ",comment).split())
    textlen = len(comment.split())
    if textlen ==0:
        textlen =1
    punc = punc/textlen
    uppercase = len(filter(lambda x: x in string.uppercase, comment)) / textlen
    comment = [x.lower() for x in comment.split()]
    comment = [ps.stem(wordnet_lemmatizer.lemmatize(w)) for w in comment]
    comment = " ".join(comment)
    badwords = sum([comment.count(i) for i in f])/textlen
    # for uni in [comment.count(x) for x in words]:
    #     return uni
    unigrams = [comment.count(x) for x in words]
    all_features = tuple([textlen,punc,uppercase,badwords,vader,hashtag,user,url]+unigrams)

    # return textlen,punc,uppercase,badwords,vader,hashtag,user,url,tuple(unigrams)
    return all_features
Example #2
0
def index():
    form = NameForm()
    if form.validate_on_submit():
        score = vaderSentiment.SentimentIntensityAnalyzer().polarity_scores(
            form.name.data)['compound']
        if score > 0.05:
            sentiment = 'Positive'
            category = 'success'
        elif score < -0.05:
            sentiment = 'Negative'
            category = 'danger'
        else:
            sentiment = 'Neutral'
            category = 'info'
        flash('Score = ' + str(score) + ' (' + sentiment + ')', category)
        session['name'] = form.name.data
        return redirect(url_for('index'))
    return render_template('index.html', form=form, name=session.get('name'))
Example #3
0
import time
from alpha_vantage.timeseries import TimeSeries
import json
import re
import vaderSentiment

ALPHA_API_KEY = ''
CONSUMER_KEY = ''
CONSUMER_SECRET_KEY = ''
ACCESS_TOKEN = ''
ACCESS_SECRET_TOKEN = ''

STOCK_NAME = 'TSLA'
TWEET_SAMPLING_TIME = 100

analyzer = vaderSentiment.SentimentIntensityAnalyzer()


def generate_stocks_csv(ts):
    stock_data, meta_data = ts.get_daily(STOCK_NAME, outputsize='full')
    #stock_data.drop('2. high', 1)
    del stock_data['2. high']
    del stock_data['1. open']
    del stock_data['3. low']
    stock_data.to_csv(STOCK_NAME + '.csv', sep=',')


def cleanse_tweet(tweet):
    # process the tweets

    #Convert to lower case