def scrape_site(self): marketwatchScraper = MarketwatchNewsScraper() all_articles = [] sentimentAnalyzer = SentimentAnalyzer() while True: articles = marketwatchScraper.scrape() for article in articles: if article not in all_articles: headline = article.headline print('headline:', headline) print('url', article.url) all_articles.append(article) # if len(headline.split()) >= 20: # sentimentAnalyzer.google_analyze(headline) # else: sentimentAnalyzer.analyze(headline) # for headline in newsHeadlines: # #print('headline: ', headline) # sentimentAnalyzer.google_analyze(headline) # # print('headline: ', newsHeadlines[0]) # # sentimentAnalyzer.google_analyze(newsHeadlines[0]) logger.info("Will get news headlines again in %s sec..." % self.frequency) print() print() print() print() time.sleep(self.frequency)
def __init__(self): self.sz = SentimentAnalyzer() with open(os.path.join(os.getcwd(), 'vectorizer.pk'), 'rb') as fin: self.vectorizer = pickle.load(fin) with open(os.path.join(os.getcwd(), 'question_detection.sav'), 'rb') as file: self.model = pickle.load(file)
def Extract(self, username, filters, KEY, SECRET): authentication = tweepy.OAuthHandler(consumer_key, consumer_secret) authentication.set_access_token(KEY, SECRET) TwitterAPI = tweepy.API(authentication) resp = TwitterAPI.user_timeline(screen_name=username, count=200) tweets = [tweet._json for tweet in resp] user = tweets[0]['user'] SentAnaly = SentimentAnalyzer() TempAnaly = TemporalAnalyzer() MetaAnaly = MetaDataAnalyzer() NetAnaly = NetworkAnalyzer() account = pd.Series() account = account.append(SentAnaly.SentimentAnalysis(tweets)) account = account.append(TempAnaly.TemporalAnalysis(tweets)) account = account.append(MetaAnaly.MetaDataAnalysis(user)) account = account.append(NetAnaly.NetworkAnalysis(user)) DNN = DNNReg() temp = list() temp.append(account) df = pd.DataFrame(temp) toints = [ 'Blanguage', 'BdefaultProfile', 'BdefaultImage', 'BprofileBackgroundImage', 'Bverified', 'Bprotected', 'BgeoEnabled' ] sentfeat = [ 'positiveNum', 'positivePol', 'neutralNum', 'neutralPol', 'negativeNum', 'negativePol', 'average', 'standardDeviation' ] metafeat = ['screenNameLength', 'screenNameDigits', 'accountNameLength', 'ageDays', 'descriptionLength', 'Blanguage', 'BdefaultProfile', \ 'BdefaultImage', 'BprofileBackgroundImage', 'Bverified', 'Bprotected', 'BgeoEnabled'] netfeat = [ 'favoritesCount', 'followersCount', 'friendsCount', 'listedCount' ] tempfeat = ["tweetsPerDay", "tweetsPerMSecPerDay"] df[toints] = df[toints].astype(int) Final = DNN.run(df, "twee") Sent = DNN.run(df[sentfeat], "sent") Meta = DNN.run(df[metafeat], "meta") Temporal = DNN.run(df[tempfeat], "temp") Network = DNN.run(df[netfeat], "net") Lang = user['lang'] ret = { 'Final': Final, 'Sentiment': Sent, 'Meta': Meta, 'Temporal': Temporal, 'Network': Network, 'Lang': Lang } print(ret, end='')
def __init__(self): wordVectors = np.load("IMDBSA/wordVectors.npy") wordMap = np.load("IMDBSA/wordMap.npy").item() parameterMap = np.load("IMDBSA/paramMap.npy").item() MAX_SEQUENCE_LENGTH = parameterMap["MAX_SEQUENCE_LENGTH"] BATCH_SIZE = parameterMap["BATCH_SIZE"] LSTM_UNITS = parameterMap["LSTM_UNITS"] LEARNING_RATE = parameterMap["LEARNING_RATE"] self.analyzer = SentimentAnalyzer(MAX_SEQUENCE_LENGTH, BATCH_SIZE, LSTM_UNITS, LEARNING_RATE, wordMap, wordVectors) self.analyzer.LoadModel("IMDBSA/model")
def __init__(self, parameter, liveView, pieView): self.sentimentAnalyzer = SentimentAnalyzer() self.writer = Writer() self.parameter = parameter print('Creating token') self.liveView = liveView self.pieView = pieView
def analyze(): if request.method == 'POST': print(request.data); data = json.loads(request.data) if "" in data: return make_response(jsonify({'error': 'Your request is empty, please send a valid request'}), 400) else: url = data.get("url", None) if url is None: return make_response(jsonify({'error': 'URL parameter not available. check your request'}), 400) else: if not validators.url(url): return make_response(jsonify({'error': 'URL sent was not valid'}), 400) else: result = SentimentAnalyzer(url).sentiment_analyze_invoke() if (result["status"] and result["sentiment"]): return make_response(jsonify({'status': True, 'details': 'Succesfully Analyzed', 'result': result["sentiment"]}), 200) else: if (result["error"]): return make_response(jsonify({'status': False, 'details': 'Internal Error Occured', 'error': result["error"]}), 500) else: return make_response(jsonify({'status': False, 'details': 'Internal Error Occured', 'error': "N/A"}), 500)
class IMDBSentimentAnalyzer: def __init__(self): wordVectors = np.load("IMDBSA/wordVectors.npy") wordMap = np.load("IMDBSA/wordMap.npy").item() parameterMap = np.load("IMDBSA/paramMap.npy").item() MAX_SEQUENCE_LENGTH = parameterMap["MAX_SEQUENCE_LENGTH"] BATCH_SIZE = parameterMap["BATCH_SIZE"] LSTM_UNITS = parameterMap["LSTM_UNITS"] LEARNING_RATE = parameterMap["LEARNING_RATE"] self.analyzer = SentimentAnalyzer(MAX_SEQUENCE_LENGTH, BATCH_SIZE, LSTM_UNITS, LEARNING_RATE, wordMap, wordVectors) self.analyzer.LoadModel("IMDBSA/model") # Returns an array of 1 for positive and 0 for negative in order of the samples array def Evaluate(self, textSamples): return self.analyzer.Evaluate(textSamples)
def main(): parser = argparse.ArgumentParser( description='Train a MNB or SVM Sentiment Analyzer.') parser.add_argument('training_set', metavar='Training set directory', help='Training set directory.') parser.add_argument('test_set', metavar='Test set directory', help='Test set directory.') parser.add_argument('-a', '--algorithm', dest='algorithm', default='MNB', choices=['MNB', 'SVM'], help='Algorithm.') parser.add_argument( '-v', '--vectorizer', dest='vectorizer', default='tf-idf', choices=['tf-idf', 'count'], help='Vectorizer (feature [count], or fractional [tf-idf] count).') parser.add_argument('-s', '--size', '--training_set_size', dest='training_set_size', type=int, help='Training set size.') parser.add_argument('-ng', '--ngram', '--ngram_length', dest='ngram_length', default='unigram', help='N-gram length.', choices=['unigram', 'bigram', 'trigram']) parser.add_argument('-ne', '--neutral', dest='include_neutral', default=False, help='Include neutral class?', action=argparse.BooleanOptionalAction) parser.add_argument('-sw', '--stopwords', dest='use_stopwords', default=True, help='Use stop words?', action=argparse.BooleanOptionalAction) args = parser.parse_args() print(f'Starting sentiment analysis with: {args}') s = SentimentAnalyzer(args) print('Starting training.') s.train() print('Starting testing.') s.test()
class TweetObtainer(StreamListener): writer = None sentimentAnalyzer = None tokens = '' parameter = '' liveView = None pieView = None currentNumber = 0 stream = None def __init__(self, parameter, liveView, pieView): self.sentimentAnalyzer = SentimentAnalyzer() self.writer = Writer() self.parameter = parameter print('Creating token') self.liveView = liveView self.pieView = pieView def init_stream(self): self.writer.setSaveFile('StreamedTweets.txt') def start(self): print("Setting up tweetobtainer") #TwitterAPI authorization auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_secret) self.stream = Stream(auth, self) self.stream.filter(track=[self.parameter], languages=['en']) ''' Wordt elke keer als er een tweet binnenkomt aangeroepen Stuurt de opgehaalde tweet door naar de analyse en schrijft de analyse+tweet weg in een bestand als er minder dan 10.000 zijn opgehaald deze sessie. Slaapt voor 1 seconde zodat er genoeg tijd is om de tweet te verwerken. ''' def on_data(self, data): text = json.loads(data) #Use only the text field of obtained JSON String if 'text' in text: text = text['text'] tweet = self.sentimentAnalyzer.preprocess(text) print(tweet) sentiment = self.sentimentAnalyzer.analyse(tweet) if self.currentNumber <= 10000: self.writer.write(sentiment + text) self.currentNumber += 1 self.liveView.update(sentiment) self.pieView.update() time.sleep(1) return True def on_error(self, status_code): print('Got an error with status code: ' + str(status_code)) return True # To continue listening def on_timeout(self): print('Timeout...') return True # To continue listening def stop_stream(self): self.stream.disconnect()
import json import os from flask import Flask, render_template, request, abort from SentimentAnalyzer import SentimentAnalyzer from SentimentAnalyzer import stemmed_words app = Flask("SentimentAnalyzerController") analyzer = SentimentAnalyzer() @app.route("/", methods=["GET"]) def index(): return render_template("index.html") @app.route("/about", methods=["GET"]) def about(): return render_template("about.html") @app.route("/api/predict", methods=["GET"]) def predict(): if "text" not in request.args: app.logger.error("There is no 'text' arg") abort(400) text = request.args.get("text") predictions = analyzer.predict(text)
class NLUDefault: def __init__(self): self.sz = SentimentAnalyzer() with open(os.path.join(os.getcwd(), 'vectorizer.pk'), 'rb') as fin: self.vectorizer = pickle.load(fin) with open(os.path.join(os.getcwd(), 'question_detection.sav'), 'rb') as file: self.model = pickle.load(file) def parse(self, input_str): sf = SemanticFrame() input_str = input_str.lower() sf.Intent = 'user_statement' for pizza in PizzaMenu.specialty: if pizza.lower() in input_str: sf.Slots['pizza'] = pizza sf.Intent = "order_pizza" for topping in PizzaMenu.Toppings: if topping.lower() in input_str: if 'topping' not in sf.Slots: sf.Slots['topping'] = [] sf.Slots['topping'].append(topping) sf.Intent = "order_pizza" for size in PizzaMenu.sizes: if size.lower() in input_str: sf.Slots['size'] = size sf.Intent = "order_pizza" for crust in PizzaMenu.crusts: if crust.lower() in input_str: ifcontain = True if crust.lower() == 'thin': ifcontain = False for m in re.finditer('thin', input_str): if len(input_str) == m.end( ) or input_str[m.start():m.end() + 1] != 'think': ifcontain = True break if ifcontain: sf.Slots['crust'] = crust sf.Intent = "order_pizza" for side in PizzaMenu.sides: if side.lower() in input_str: sf.Slots['side'] = side sf.Intent = "order_extras" for drink in PizzaMenu.drinks: if drink.lower() in input_str: sf.Slots['drink'] = drink sf.Intent = "order_extras" phone = re.findall(r"([\dA-Z]{3}-[\dA-Z]{3}-[\dA-Z]{4})", input_str, re.IGNORECASE) if phone: for num in phone: sf.Slots['phone'] = num sf.Intent = "provide_contact_information" other_contact = re.findall(r"(?:this is|it's) ([\S]+)", input_str, re.IGNORECASE) if other_contact: for contact in other_contact: sf.Slots['contact'] = contact sf.Intent = "provide_contact_information" for ele in delivery: if ele in input_str: if ele == ' delivery' or ele == 'delivery': ele = 'delivery' else: ele = 'pick-up' sf.Slots['delivery_type'] = ele sf.Intent = "inform_delivery" for ele in change: if ele in input_str: sf.Intent = "change_order" for ele in reorder: if ele in input_str: sf.Intent = "reorder_favorite" ifquestion = self.model.predict(self.vectorizer.transform([input_str ]))[0] if ifquestion == 'whQuestion' or ifquestion == 'ynQuestion': if 'recommend' in input_str: sf.Intent = 'ask_for_recommend' sf.Slots['recommend'] = [] for item in recommend: if item in input_str: sf.Slots['recommend'].append(item) elif 'order' in input_str: sf.Intent = "query_pizza_status" for ele in confirm: if ele in input_str: if ele == 'yes' or ele == 'yeah' or ele == 'right' or ele == 'sure' or ele == 'exactly' or ele == 'yep' or ele == 'why not': sf.Slots['confirm'] = 'yes' else: sf.Slots['confirm'] = 'no' sf.Intent = "confirm_previous" return sf, self.sz.compound_sentiment_score(input_str)
# init consumer consumer = KafkaConsumer(source_topic_name, bootstrap_servers=['localhost:9092'], auto_offset_reset='earliest', enable_auto_commit=True, group_id=consumer_group_id, value_deserializer=lambda x: loads(x.decode('utf-8'))) # init producer producer = KafkaProducer(bootstrap_servers=['localhost:9092'], value_serializer=lambda x: dumps(x).encode('utf-8'), api_version=(0, 10, 1)) # init sentiment analyzer sa = SentimentAnalyzer() tokenizer = sa.token() # start consuming for message in consumer: # overwrite message with its value and preprocess text message = message.value.copy() # extract hashtags hashtags = [] if len(message['hashtags']) != 0: for hashtag_data in message['hashtags']: hashtags.append(hashtag_data["text"]) # overwrite hashtags data structure with plain hashtags text
from SentimentAnalyzer import SentimentAnalyzer senti_obj = SentimentAnalyzer() senti_obj.get_sentiments()
def Extract(self, handle): USER = TwitterAPI.get_user(handle) TWEETS = TwitterAPI.user_timeline(screen_name=handle, count=200) SentAnalyzer = SentimentAnalyzer() SentimentReport = SentAnalyzer.SentimentAnalysis(TWEETS) return SentimentReport
# Load and save the word vectors and index map model = gensim.models.KeyedVectors.load_word2vec_format("WordVectors/gensim_glove_wiki_vectors.txt", binary=False) wordVectors = model.syn0 wordsList = model.index2word wordMap = {wordsList[i]: i for i in range(len(wordsList))} np.save("IMDBSA/wordMap", wordMap) np.save("IMDBSA/wordVectors", wordVectors) # Find the training data positiveFiles = ['./Data/IMDBData/train/pos/' + f for f in os.listdir('./Data/IMDBData/train/pos/') if os.path.isfile(os.path.join('./Data/IMDBData/train/pos/', f))] negativeFiles = ['./Data/IMDBData/train/neg/' + f for f in os.listdir('./Data/IMDBData/train/neg/') if os.path.isfile(os.path.join('./Data/IMDBData/train/neg/', f))] # Initialize the pre-processor and sentiment analyzer processor = PreProcessor() analyzer = SentimentAnalyzer(MAX_SEQUENCE_LENGTH, BATCH_SIZE, LSTM_UNITS, LEARNING_RATE, wordMap, wordVectors) # Load and process the training data negativeSamples = [] positiveSamples = [] for pf in positiveFiles: with open(pf, "r", encoding="utf8") as f: lines = f.readlines() positiveSamples.extend(processor.cleanTextList(lines)) print("Cleaned positive document: " + pf) for nf in negativeFiles: with open(nf, "r", encoding="utf8") as f: lines = f.readlines() negativeSamples.extend(processor.cleanTextList(lines)) print("Cleaned negative document: " + nf)
from nltk.corpus import brown from nltk.tokenize import word_tokenize, sent_tokenize from nltk.parse.corenlp import CoreNLPDependencyParser from MyCorpusReader import MyCorpusReader from AspectDetector import AspectDetector from MyWordNetSimilarity import wup_similarity from SentimentAnalyzer import SentimentAnalyzer POSITIVE_KEY = "POSITIVE" NEGATIVE_KEY = "NEGATIVE" NEUTRAL_KEY = "NEUTRAL" corpus = MyCorpusReader("reviews") # corpus = MyCorpusReader("_samplereview3") a = AspectDetector(brown, corpus) sentimentAnalyzer = SentimentAnalyzer() parser = CoreNLPDependencyParser(url='http://localhost:9000') raw = corpus.raw() sents = sent_tokenize(raw) # Retrieve the initial list of aspects potentialAspects = a.run() # Only consider the top 20% of aspects ndx = int(0.2 * len(potentialAspects)) potentialAspects = potentialAspects[:ndx] # Setup variables for calculating average similarity wordSimilarity = dict() for w in potentialAspects: