Example #1
0
    def scrape_site(self):

        marketwatchScraper = MarketwatchNewsScraper()
        all_articles = []

        sentimentAnalyzer = SentimentAnalyzer()

        while True:
            articles = marketwatchScraper.scrape()

            for article in articles:
                if article not in all_articles:
                    headline = article.headline
                    print('headline:', headline)
                    print('url', article.url)
                    all_articles.append(article)
                    # if len(headline.split()) >= 20:
                    #     sentimentAnalyzer.google_analyze(headline)
                    # else:
                    sentimentAnalyzer.analyze(headline)

            # for headline in newsHeadlines:
            #         #print('headline: ', headline)
            #     sentimentAnalyzer.google_analyze(headline)
            # # print('headline: ', newsHeadlines[0])
            # # sentimentAnalyzer.google_analyze(newsHeadlines[0])
            logger.info("Will get news headlines again in %s sec..." %
                        self.frequency)
            print()
            print()
            print()
            print()
            time.sleep(self.frequency)
Example #2
0
 def __init__(self):
     self.sz = SentimentAnalyzer()
     with open(os.path.join(os.getcwd(), 'vectorizer.pk'), 'rb') as fin:
         self.vectorizer = pickle.load(fin)
     with open(os.path.join(os.getcwd(), 'question_detection.sav'),
               'rb') as file:
         self.model = pickle.load(file)
    def Extract(self, username, filters, KEY, SECRET):
        authentication = tweepy.OAuthHandler(consumer_key, consumer_secret)
        authentication.set_access_token(KEY, SECRET)

        TwitterAPI = tweepy.API(authentication)

        resp = TwitterAPI.user_timeline(screen_name=username, count=200)
        tweets = [tweet._json for tweet in resp]
        user = tweets[0]['user']
        SentAnaly = SentimentAnalyzer()
        TempAnaly = TemporalAnalyzer()
        MetaAnaly = MetaDataAnalyzer()
        NetAnaly = NetworkAnalyzer()
        account = pd.Series()
        account = account.append(SentAnaly.SentimentAnalysis(tweets))
        account = account.append(TempAnaly.TemporalAnalysis(tweets))
        account = account.append(MetaAnaly.MetaDataAnalysis(user))
        account = account.append(NetAnaly.NetworkAnalysis(user))
        DNN = DNNReg()
        temp = list()
        temp.append(account)
        df = pd.DataFrame(temp)
        toints = [
            'Blanguage', 'BdefaultProfile', 'BdefaultImage',
            'BprofileBackgroundImage', 'Bverified', 'Bprotected', 'BgeoEnabled'
        ]
        sentfeat = [
            'positiveNum', 'positivePol', 'neutralNum', 'neutralPol',
            'negativeNum', 'negativePol', 'average', 'standardDeviation'
        ]
        metafeat = ['screenNameLength', 'screenNameDigits', 'accountNameLength', 'ageDays',
                    'descriptionLength', 'Blanguage', 'BdefaultProfile', \
                    'BdefaultImage', 'BprofileBackgroundImage', 'Bverified', 'Bprotected', 'BgeoEnabled']
        netfeat = [
            'favoritesCount', 'followersCount', 'friendsCount', 'listedCount'
        ]
        tempfeat = ["tweetsPerDay", "tweetsPerMSecPerDay"]
        df[toints] = df[toints].astype(int)
        Final = DNN.run(df, "twee")
        Sent = DNN.run(df[sentfeat], "sent")
        Meta = DNN.run(df[metafeat], "meta")
        Temporal = DNN.run(df[tempfeat], "temp")
        Network = DNN.run(df[netfeat], "net")
        Lang = user['lang']
        ret = {
            'Final': Final,
            'Sentiment': Sent,
            'Meta': Meta,
            'Temporal': Temporal,
            'Network': Network,
            'Lang': Lang
        }
        print(ret, end='')
 def __init__(self):
     wordVectors = np.load("IMDBSA/wordVectors.npy")
     wordMap = np.load("IMDBSA/wordMap.npy").item()
     parameterMap = np.load("IMDBSA/paramMap.npy").item()
     MAX_SEQUENCE_LENGTH = parameterMap["MAX_SEQUENCE_LENGTH"]
     BATCH_SIZE = parameterMap["BATCH_SIZE"]
     LSTM_UNITS = parameterMap["LSTM_UNITS"]
     LEARNING_RATE = parameterMap["LEARNING_RATE"]
     self.analyzer = SentimentAnalyzer(MAX_SEQUENCE_LENGTH, BATCH_SIZE,
                                       LSTM_UNITS, LEARNING_RATE, wordMap,
                                       wordVectors)
     self.analyzer.LoadModel("IMDBSA/model")
Example #5
0
 def __init__(self, parameter, liveView, pieView):
     self.sentimentAnalyzer = SentimentAnalyzer()
     self.writer = Writer()
     self.parameter = parameter
     print('Creating token')
     self.liveView = liveView
     self.pieView = pieView
Example #6
0
def analyze():
    if request.method == 'POST':
        print(request.data);

        data = json.loads(request.data)

        if "" in data:
            return make_response(jsonify({'error': 'Your request is empty, please send a valid request'}), 400)
        else:
            url = data.get("url", None)

            if url is None:
                return make_response(jsonify({'error': 'URL parameter not available. check your request'}), 400)

            else:
                if not validators.url(url):
                    return make_response(jsonify({'error': 'URL sent was not valid'}), 400)

                else:
                    result = SentimentAnalyzer(url).sentiment_analyze_invoke()

                    if (result["status"] and result["sentiment"]):
                        return make_response(jsonify({'status': True, 'details': 'Succesfully Analyzed', 'result': result["sentiment"]}), 200)
                    
                    else:
                        if (result["error"]):
                            return make_response(jsonify({'status': False, 'details': 'Internal Error Occured', 'error': result["error"]}), 500)
                        else:
                            return make_response(jsonify({'status': False, 'details': 'Internal Error Occured', 'error': "N/A"}), 500)
class IMDBSentimentAnalyzer:
    def __init__(self):
        wordVectors = np.load("IMDBSA/wordVectors.npy")
        wordMap = np.load("IMDBSA/wordMap.npy").item()
        parameterMap = np.load("IMDBSA/paramMap.npy").item()
        MAX_SEQUENCE_LENGTH = parameterMap["MAX_SEQUENCE_LENGTH"]
        BATCH_SIZE = parameterMap["BATCH_SIZE"]
        LSTM_UNITS = parameterMap["LSTM_UNITS"]
        LEARNING_RATE = parameterMap["LEARNING_RATE"]
        self.analyzer = SentimentAnalyzer(MAX_SEQUENCE_LENGTH, BATCH_SIZE,
                                          LSTM_UNITS, LEARNING_RATE, wordMap,
                                          wordVectors)
        self.analyzer.LoadModel("IMDBSA/model")

    # Returns an array of 1 for positive and 0 for negative in order of the samples array
    def Evaluate(self, textSamples):
        return self.analyzer.Evaluate(textSamples)
Example #8
0
def main():
    parser = argparse.ArgumentParser(
        description='Train a MNB or SVM Sentiment Analyzer.')
    parser.add_argument('training_set',
                        metavar='Training set directory',
                        help='Training set directory.')
    parser.add_argument('test_set',
                        metavar='Test set directory',
                        help='Test set directory.')
    parser.add_argument('-a',
                        '--algorithm',
                        dest='algorithm',
                        default='MNB',
                        choices=['MNB', 'SVM'],
                        help='Algorithm.')
    parser.add_argument(
        '-v',
        '--vectorizer',
        dest='vectorizer',
        default='tf-idf',
        choices=['tf-idf', 'count'],
        help='Vectorizer (feature [count], or fractional [tf-idf] count).')
    parser.add_argument('-s',
                        '--size',
                        '--training_set_size',
                        dest='training_set_size',
                        type=int,
                        help='Training set size.')
    parser.add_argument('-ng',
                        '--ngram',
                        '--ngram_length',
                        dest='ngram_length',
                        default='unigram',
                        help='N-gram length.',
                        choices=['unigram', 'bigram', 'trigram'])
    parser.add_argument('-ne',
                        '--neutral',
                        dest='include_neutral',
                        default=False,
                        help='Include neutral class?',
                        action=argparse.BooleanOptionalAction)
    parser.add_argument('-sw',
                        '--stopwords',
                        dest='use_stopwords',
                        default=True,
                        help='Use stop words?',
                        action=argparse.BooleanOptionalAction)

    args = parser.parse_args()

    print(f'Starting sentiment analysis with: {args}')
    s = SentimentAnalyzer(args)
    print('Starting training.')
    s.train()
    print('Starting testing.')
    s.test()
Example #9
0
class TweetObtainer(StreamListener):
    writer = None
    sentimentAnalyzer = None
    tokens = ''
    parameter = ''
    liveView = None
    pieView = None
    currentNumber = 0
    stream = None

    def __init__(self, parameter, liveView, pieView):
        self.sentimentAnalyzer = SentimentAnalyzer()
        self.writer = Writer()
        self.parameter = parameter
        print('Creating token')
        self.liveView = liveView
        self.pieView = pieView

    def init_stream(self):
        self.writer.setSaveFile('StreamedTweets.txt')

    def start(self):
        print("Setting up tweetobtainer")
        #TwitterAPI authorization
        auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
        auth.set_access_token(access_token, access_secret)
        self.stream = Stream(auth, self)
        self.stream.filter(track=[self.parameter], languages=['en'])

    '''
    Wordt elke keer als er een tweet binnenkomt aangeroepen
    Stuurt de opgehaalde tweet door naar de analyse en schrijft de
    analyse+tweet weg in een bestand als er minder dan 10.000 zijn
    opgehaald deze sessie. Slaapt voor 1 seconde zodat er genoeg tijd is om
    de tweet te verwerken.
    '''
    def on_data(self, data):
        text = json.loads(data)

        #Use only the text field of obtained JSON String
        if 'text' in text:
            text = text['text']
            tweet = self.sentimentAnalyzer.preprocess(text)
            print(tweet)
            sentiment = self.sentimentAnalyzer.analyse(tweet)
            if self.currentNumber <= 10000:
                self.writer.write(sentiment + text)
                self.currentNumber += 1
            self.liveView.update(sentiment)
            self.pieView.update()
            time.sleep(1)
        return True

    def on_error(self, status_code):
        print('Got an error with status code: ' + str(status_code))
        return True # To continue listening

    def on_timeout(self):
        print('Timeout...')
        return True # To continue listening

    def stop_stream(self):
        self.stream.disconnect()
import json
import os

from flask import Flask, render_template, request, abort

from SentimentAnalyzer import SentimentAnalyzer
from SentimentAnalyzer import stemmed_words

app = Flask("SentimentAnalyzerController")
analyzer = SentimentAnalyzer()


@app.route("/", methods=["GET"])
def index():
    return render_template("index.html")


@app.route("/about", methods=["GET"])
def about():
    return render_template("about.html")


@app.route("/api/predict", methods=["GET"])
def predict():
    if "text" not in request.args:
        app.logger.error("There is no 'text' arg")
        abort(400)

    text = request.args.get("text")

    predictions = analyzer.predict(text)
Example #11
0
class NLUDefault:
    def __init__(self):
        self.sz = SentimentAnalyzer()
        with open(os.path.join(os.getcwd(), 'vectorizer.pk'), 'rb') as fin:
            self.vectorizer = pickle.load(fin)
        with open(os.path.join(os.getcwd(), 'question_detection.sav'),
                  'rb') as file:
            self.model = pickle.load(file)

    def parse(self, input_str):
        sf = SemanticFrame()
        input_str = input_str.lower()
        sf.Intent = 'user_statement'

        for pizza in PizzaMenu.specialty:
            if pizza.lower() in input_str:
                sf.Slots['pizza'] = pizza
                sf.Intent = "order_pizza"

        for topping in PizzaMenu.Toppings:
            if topping.lower() in input_str:
                if 'topping' not in sf.Slots:
                    sf.Slots['topping'] = []
                sf.Slots['topping'].append(topping)
                sf.Intent = "order_pizza"

        for size in PizzaMenu.sizes:
            if size.lower() in input_str:
                sf.Slots['size'] = size
                sf.Intent = "order_pizza"

        for crust in PizzaMenu.crusts:
            if crust.lower() in input_str:
                ifcontain = True
                if crust.lower() == 'thin':
                    ifcontain = False
                    for m in re.finditer('thin', input_str):
                        if len(input_str) == m.end(
                        ) or input_str[m.start():m.end() + 1] != 'think':
                            ifcontain = True
                            break
                if ifcontain:
                    sf.Slots['crust'] = crust
                    sf.Intent = "order_pizza"

        for side in PizzaMenu.sides:
            if side.lower() in input_str:
                sf.Slots['side'] = side
                sf.Intent = "order_extras"

        for drink in PizzaMenu.drinks:
            if drink.lower() in input_str:
                sf.Slots['drink'] = drink
                sf.Intent = "order_extras"

        phone = re.findall(r"([\dA-Z]{3}-[\dA-Z]{3}-[\dA-Z]{4})", input_str,
                           re.IGNORECASE)
        if phone:
            for num in phone:
                sf.Slots['phone'] = num
                sf.Intent = "provide_contact_information"

        other_contact = re.findall(r"(?:this is|it's) ([\S]+)", input_str,
                                   re.IGNORECASE)
        if other_contact:
            for contact in other_contact:
                sf.Slots['contact'] = contact
                sf.Intent = "provide_contact_information"

        for ele in delivery:
            if ele in input_str:
                if ele == ' delivery' or ele == 'delivery':
                    ele = 'delivery'
                else:
                    ele = 'pick-up'
                sf.Slots['delivery_type'] = ele
                sf.Intent = "inform_delivery"

        for ele in change:
            if ele in input_str:
                sf.Intent = "change_order"

        for ele in reorder:
            if ele in input_str:
                sf.Intent = "reorder_favorite"

        ifquestion = self.model.predict(self.vectorizer.transform([input_str
                                                                   ]))[0]
        if ifquestion == 'whQuestion' or ifquestion == 'ynQuestion':
            if 'recommend' in input_str:
                sf.Intent = 'ask_for_recommend'
                sf.Slots['recommend'] = []
                for item in recommend:
                    if item in input_str:
                        sf.Slots['recommend'].append(item)
            elif 'order' in input_str:
                sf.Intent = "query_pizza_status"

        for ele in confirm:
            if ele in input_str:
                if ele == 'yes' or ele == 'yeah' or ele == 'right' or ele == 'sure' or ele == 'exactly' or ele == 'yep' or ele == 'why not':
                    sf.Slots['confirm'] = 'yes'
                else:
                    sf.Slots['confirm'] = 'no'
                sf.Intent = "confirm_previous"
        return sf, self.sz.compound_sentiment_score(input_str)
Example #12
0
# init consumer
consumer = KafkaConsumer(source_topic_name,
                         bootstrap_servers=['localhost:9092'],
                         auto_offset_reset='earliest',
                         enable_auto_commit=True,
                         group_id=consumer_group_id,
                         value_deserializer=lambda x: loads(x.decode('utf-8')))

# init producer
producer = KafkaProducer(bootstrap_servers=['localhost:9092'],
                         value_serializer=lambda x: dumps(x).encode('utf-8'),
                         api_version=(0, 10, 1))

# init sentiment analyzer
sa = SentimentAnalyzer()
tokenizer = sa.token()

# start consuming
for message in consumer:

    # overwrite message with its value and preprocess text
    message = message.value.copy()

    # extract hashtags
    hashtags = []
    if len(message['hashtags']) != 0:
        for hashtag_data in message['hashtags']:
            hashtags.append(hashtag_data["text"])

    # overwrite hashtags data structure with plain hashtags text
from SentimentAnalyzer import SentimentAnalyzer

senti_obj = SentimentAnalyzer()
senti_obj.get_sentiments()
 def Extract(self, handle):
     USER = TwitterAPI.get_user(handle)
     TWEETS = TwitterAPI.user_timeline(screen_name=handle, count=200)
     SentAnalyzer = SentimentAnalyzer()
     SentimentReport = SentAnalyzer.SentimentAnalysis(TWEETS)
     return SentimentReport
# Load and save the word vectors and index map
model = gensim.models.KeyedVectors.load_word2vec_format("WordVectors/gensim_glove_wiki_vectors.txt", binary=False)
wordVectors = model.syn0
wordsList = model.index2word
wordMap = {wordsList[i]: i for i in range(len(wordsList))}
np.save("IMDBSA/wordMap", wordMap)
np.save("IMDBSA/wordVectors", wordVectors)

# Find the training data
positiveFiles = ['./Data/IMDBData/train/pos/' + f for f in os.listdir('./Data/IMDBData/train/pos/') if os.path.isfile(os.path.join('./Data/IMDBData/train/pos/', f))]
negativeFiles = ['./Data/IMDBData/train/neg/' + f for f in os.listdir('./Data/IMDBData/train/neg/') if os.path.isfile(os.path.join('./Data/IMDBData/train/neg/', f))]

# Initialize the pre-processor and sentiment analyzer
processor = PreProcessor()
analyzer = SentimentAnalyzer(MAX_SEQUENCE_LENGTH, BATCH_SIZE, LSTM_UNITS, LEARNING_RATE, wordMap, wordVectors)

# Load and process the training data
negativeSamples = []
positiveSamples = []
for pf in positiveFiles:
    with open(pf, "r", encoding="utf8") as f:
       lines = f.readlines()
       positiveSamples.extend(processor.cleanTextList(lines))
       print("Cleaned positive document: " + pf)

for nf in negativeFiles:
    with open(nf, "r", encoding="utf8") as f:
       lines = f.readlines()
       negativeSamples.extend(processor.cleanTextList(lines))
       print("Cleaned negative document: " + nf)
Example #16
0
from nltk.corpus import brown
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.parse.corenlp import CoreNLPDependencyParser
from MyCorpusReader import MyCorpusReader
from AspectDetector import AspectDetector
from MyWordNetSimilarity import wup_similarity
from SentimentAnalyzer import SentimentAnalyzer

POSITIVE_KEY = "POSITIVE"
NEGATIVE_KEY = "NEGATIVE"
NEUTRAL_KEY = "NEUTRAL"

corpus = MyCorpusReader("reviews")
# corpus = MyCorpusReader("_samplereview3")
a = AspectDetector(brown, corpus)
sentimentAnalyzer = SentimentAnalyzer()
parser = CoreNLPDependencyParser(url='http://localhost:9000')

raw = corpus.raw()
sents = sent_tokenize(raw)

# Retrieve the initial list of aspects
potentialAspects = a.run()

# Only consider the top 20% of aspects
ndx = int(0.2 * len(potentialAspects))
potentialAspects = potentialAspects[:ndx]

# Setup variables for calculating average similarity
wordSimilarity = dict()
for w in potentialAspects: