def main(): tweets = pd.read_csv('tweets.csv') LABEL_PATH = "labels/" MODEL_PATH = "model/" analyzer = SentimentAnalyzer(MODEL_PATH, LABEL_PATH) data = defaultdict(lambda: defaultdict(int)) universities = tweets.iloc[:, 0] tweets_text = tweets.iloc[:, 1] print("Predicting sentiments...") sentiments = analyzer.batch_predict_sentiment(tweets_text) for i in range(len(universities)): univeristy = universities[i] sentiment = sentiments[i] data[univeristy][sentiment] += 1 print(data) # calculate the ratio of pos/neg university_ratio_posneg = [] for univeristy in data: pos_neg_ratio = data[univeristy]["Positive"] / data[univeristy][ "Negative"] university_ratio_posneg.append((univeristy, pos_neg_ratio)) university_ratio_posneg.sort(key=lambda x: x[1], reverse=True) for univeristy, pos_neg_ratio in university_ratio_posneg: print(univeristy, pos_neg_ratio)
class SentenceCompleter(): def __init__(self): self.generator = Generator() self.sentiment_analyzer = SentimentAnalyzer() # Initialize everything (takes time) self.generator.initialize() self.sentiment_analyzer.initialize() def complete_sentence(self, prefix): iteration = 0 while iteration < 20: sentence = self.generator.complete(prefix) logging.info("Generated %s, trial %d" % (sentence, iteration)) if not self.sentiment_analyzer.is_negative(sentence): return sentence logging.warning("Negative sentence generated: %s, trial %d" % (sentence, iteration)) iteration += 1 # Try again def complete_prettify_shorten_sentence(self, prefix, length): sentence = self.complete_sentence(prefix) sentence = sentence.replace("' ", "'")[:length] if "." in sentence: sentence = sentence[:sentence.rfind(".") + 1] return sentence
def build_report(self): global active_file global sub_name try: active_file = filedialog.askopenfilename( initialdir=os.getcwd(), title='Open Report', filetypes=(("json gz files", "*.gz"), ("all files", "*.*"))) print(active_file) sub_name = os.path.basename(active_file).split('_')[0] df = self.data_source.load_from_file(active_file) if self.method_selection.get() == 'quick': df = apply_sentiment_intensity(df) else: sentiment_analyzer = SentimentAnalyzer() df = sentiment_analyzer.predict( df[::20] ) # every Nth record - it is still too slow to process all records self.show_report(df) except FileNotFoundError as e: messagebox.showerror( "Error", "Invalid file loaded. Please try gathering data again or selecting another dataset." ) print(e)
def __init__(self): self.generator = Generator() self.sentiment_analyzer = SentimentAnalyzer() # Initialize everything (takes time) self.generator.initialize() self.sentiment_analyzer.initialize()
def demo_subjectivity(trainer, save_analyzer=False, n_instances=None, output=None): """ Train and test a classifier on instances of the Subjective Dataset by Pang and Lee. The dataset is made of 5000 subjective and 5000 objective sentences. All tokens (words and punctuation marks) are separated by a whitespace, so we use the basic WhitespaceTokenizer to parse the data. :param trainer: `train` method of a classifier. :param save_analyzer: if `True`, store the SentimentAnalyzer in a pickle file. :param n_instances: the number of total sentences that have to be used for training and testing. Sentences will be equally split between positive and negative. :param output: the output file where results have to be reported. """ from sentiment_analyzer import SentimentAnalyzer from nltk.corpus import subjectivity if n_instances is not None: n_instances = int(n_instances/2) subj_docs = [(sent, 'subj') for sent in subjectivity.sents(categories='subj')[:n_instances]] obj_docs = [(sent, 'obj') for sent in subjectivity.sents(categories='obj')[:n_instances]] # We separately split subjective and objective instances to keep a balanced # uniform class distribution in both train and test sets. train_subj_docs, test_subj_docs = split_train_test(subj_docs) train_obj_docs, test_obj_docs = split_train_test(obj_docs) training_docs = train_subj_docs+train_obj_docs testing_docs = test_subj_docs+test_obj_docs sentim_analyzer = SentimentAnalyzer() all_words_neg = sentim_analyzer.all_words([mark_negation(doc) for doc in training_docs]) # Add simple unigram word features handling negation unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg, min_freq=4) sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats) # Apply features to obtain a feature-value representation of our datasets training_set = sentim_analyzer.apply_features(training_docs) test_set = sentim_analyzer.apply_features(testing_docs) classifier = sentim_analyzer.train(trainer, training_set) try: classifier.show_most_informative_features() except AttributeError: print('Your classifier does not provide a show_most_informative_features() method.') results = sentim_analyzer.evaluate(test_set) if save_analyzer == True: save_file(sentim_analyzer, 'sa_subjectivity.pickle') if output: extr = [f.__name__ for f in sentim_analyzer.feat_extractors] output_markdown(output, Dataset='subjectivity', Classifier=type(classifier).__name__, Tokenizer='WhitespaceTokenizer', Feats=extr, Instances=n_instances, Results=results) return sentim_analyzer
def test_init_negative01(self): with self.assertRaises(TypeError): sent = SentimentAnalyzer(feature_extractor=1, classifier=self.classifier) with self.assertRaises(TypeError): sent = SentimentAnalyzer(feature_extractor=self.feature_extractor, classifier=1)
class Utility: def __init__(self): self.sentiment = SentimentAnalyzer() self.clf = self.sentiment.clf def classifiersVsFeatures(self): with open('pickled/features_train.pickle', 'rb') as features_train: X_train = pickle.load(features_train) with open('pickled/features_test.pickle', 'rb') as features_test: X_test = pickle.load(features_test) with open('pickled/labels_train.pickle', 'rb') as labels_train: y_train = pickle.load(labels_train) with open('pickled/labels_test.pickle', 'rb') as labels_test: y_test = pickle.load(labels_test) num_features = [10000, 50000, 100000, 500000, 1000000] acc = [] for i in range(0, len(self.clf)): acc.append([]) for k in num_features: pipeline, model = self.sentiment.trainData(X_train, y_train, self.clf, k) prediction = self.sentiment.predictData(X_test, model) clf_metrics = self.sentiment.evaluate(y_test, prediction) for j in range(0, len(self.clf)): print(clf_metrics[0][j]) acc[j].append( clf_metrics[0] [j]) # Append the accuracy of the classifier for each k data = [] for i in range(0, len(self.clf)): data.append({'x': num_features, 'y': acc[i]}) return data def showTopFeatures(self, pipeline, n=20): vectorizer = pipeline.named_steps['vect'] clf = pipeline.named_steps['clf'] feature_names = vectorizer.get_feature_names() coefs = sorted(zip(clf.coef_[0], feature_names), reverse=True) topn = zip(coefs[:n], coefs[:-(n + 1):-1]) top_features = [] for (coef_p, feature_p), (coef_n, feature_n) in topn: top_features.append('{:0.4f}{: >25} {:0.4f}{: >25}'.format( coef_p, feature_p, coef_n, feature_n)) return '\n'.join(top_features)
def demo_movie_reviews(trainer, n_instances=None, output=None): """ Train classifier on all instances of the Movie Reviews dataset. The corpus has been preprocessed using the default sentence tokenizer and WordPunctTokenizer. Features are composed of: - most frequent unigrams :param trainer: `train` method of a classifier. :param n_instances: the number of total reviews that have to be used for training and testing. Reviews will be equally split between positive and negative. :param output: the output file where results have to be reported. """ from nltk.corpus import movie_reviews from sentiment_analyzer import SentimentAnalyzer if n_instances is not None: n_instances = int(n_instances/2) pos_docs = [(list(movie_reviews.words(pos_id)), 'pos') for pos_id in movie_reviews.fileids('pos')[:n_instances]] neg_docs = [(list(movie_reviews.words(neg_id)), 'neg') for neg_id in movie_reviews.fileids('neg')[:n_instances]] # We separately split positive and negative instances to keep a balanced # uniform class distribution in both train and test sets. train_pos_docs, test_pos_docs = split_train_test(pos_docs) train_neg_docs, test_neg_docs = split_train_test(neg_docs) training_docs = train_pos_docs+train_neg_docs testing_docs = test_pos_docs+test_neg_docs sentim_analyzer = SentimentAnalyzer() all_words = sentim_analyzer.all_words(training_docs) # Add simple unigram word features unigram_feats = sentim_analyzer.unigram_word_feats(all_words, min_freq=4) sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats) # Apply features to obtain a feature-value representation of our datasets training_set = sentim_analyzer.apply_features(training_docs) test_set = sentim_analyzer.apply_features(testing_docs) classifier = sentim_analyzer.train(trainer, training_set) try: classifier.show_most_informative_features() except AttributeError: print('Your classifier does not provide a show_most_informative_features() method.') results = sentim_analyzer.evaluate(test_set) if output: extr = [f.__name__ for f in sentim_analyzer.feat_extractors] output_markdown(output, Dataset='Movie_reviews', Classifier=type(classifier).__name__, Tokenizer='WordPunctTokenizer', Feats=extr, Results=results, Instances=n_instances)
def test_analyze_positive01(self): X = OrderedDict([('Первый сайт', ['Ваш банк полный ацтой!', 'Ваш магаз — нормас']), ('Второй сайт', ['Мне пофиг на ваш ресторан'])]) len_X = sum([len(X[key]) for key in X]) sent = SentimentAnalyzer(feature_extractor=self.feature_extractor, classifier=self.classifier) output = sent.analyze(X) self.assertIsInstance(output, tuple) self.assertEqual(len(output), 3) self.assertIsInstance(output[0], int) self.assertIsInstance(output[1], int) self.assertIsInstance(output[2], int) self.assertEqual(output[0] + output[1] + output[2], len_X) del sent
def sentiment_analysis(translated_articles_path): ''' Generate sentiment :param translated_articles_path: translated_articles_path :return: ''' sa = SentimentAnalyzer() file_names = os.listdir(translated_articles_path) for i, file in enumerate(file_names): print('\r generate sentiment for {}, {}/{}'.format( file, str(i + 1), len(file_names)), end='') df = pd.read_excel('translated/' + file) df['sentiment'] = sa.analise_texts(df['content_translated']) df.to_excel(translated_articles_path + file) print()
def test_pickle_unpickle_positive01(self): X = OrderedDict({ 'Первый сайт': ['Ваш банк полный ацтой!', 'Ваш магаз — нормас'], 'Второй сайт': ['Мне пофиг на ваш ресторан'] }) sent = SentimentAnalyzer(feature_extractor=self.feature_extractor, classifier=self.classifier) output1 = sent.analyze(X) with open('sent.pkl', 'wb') as f: pickle.dump(sent, f) del sent with open('sent.pkl', 'rb') as f: sent = pickle.load(f) output2 = sent.analyze(X) self.assertEqual(output1, output2) del sent
def set_up(): # before doing anything else, initialize the sentiment analyzer global sentiment_analyzer sentiment_analyzer = SentimentAnalyzer() print("initialized sentiment analyzer") # first check to see whether or not the data files are empty or not # if they are nonempty, then notify, and quit if is_non_zero_file(setup.RAW_DATA_PATH) or is_non_zero_file( setup.PROCESSED_DATA_PATH): print("Data already exists, either at " + setup.RAW_DATA_PATH + " or at " + setup.PROCESSED_DATA_PATH + ".") print( "Delete these files (after having copied its contents, perhaps), and try again." ) print("Measure.py will now self destruct.") import sys sys.exit() # Set up the two data files # the raw data file should be in json format # thus, write to it an square bracket with open(setup.RAW_DATA_PATH, "w") as raw_file: raw_file.write("[") # the processed data file should be a csv file with open(setup.PROCESSED_DATA_PATH, "w") as processed_file: processed_file.write( "Date,Hate Speech Tally,Offensive But Not Hate Speech Tally,Not Offensive Tally,Combined Score,Total Tally\n" ) # load the user ids global user_ids with open(setup.USER_IDS_PATH, "r") as user_ids_file: for line in user_ids_file: sml = [x for x in line.strip().split()] user_id = int(sml[0]) user_ids.append(user_id)
def courses_by_popularity(): course_tracker = CourseTracker() db = DatabaseManager() analyzer = SentimentAnalyzer() sentiments = [] for course in course_tracker.get_all_courses(): course_comments = db.comments_containing(course.name) if len(course_comments) == 0: continue course_sentiments = [ analyzer.analyze_sentiment(c.content) for c in course_comments ] avg_sentiment = sum(course_sentiments) / len(course_sentiments) sentiments.append({ "course": course.name, "avg_sentiment": avg_sentiment }) return jsonify( sorted(sentiments, key=lambda x: x["avg_sentiment"], reverse=True))
def prepareSentimentAnalyzer(): # import serializer. try cPickle if available. try: import cPickle as pickle except: import pickle # try loading a previously serialized sentiment analyzer instance, if possible. # otherwise, train from scratch. try: with open(SA_SERIAL_FILE) as f: sa = pickle.load(f) except (IOError, pickle.PickleError): print 'Preparing sentiment analyzer for the first time. Please wait...' sa = SentimentAnalyzer() sa.train() print 'Done.\n' with open(SA_SERIAL_FILE, 'w') as f: pickle.dump(sa, f) return sa
def initializeAnalyzer(): path_to_emoti_file = "../source/lexicon/EmoticonSentimentLexicon.txt" path_to_neut_signs_file = "../source/lexicon/NeutralitySigns.txt" path_to_polar_nouns = "../source/lexicon/polarized_nouns.txt" path_to_polar_verbs = "../source/lexicon/polarazed_verbs.txt" path_to_polar_adjectives = "../source/lexicon/polarized_adjectives.txt" path_to_polar_conjunctions = "../source/lexicon/polarized_conjunctions.txt" path_to_foma_dividers = "../source/lexicon/foma_features_dividers.txt" path_to_foma_reversers = "../source/lexicon/foma_features_reversers.txt" path_to_meaning_reversers = "../source/lexicon/meaning_reverser.txt" path_to_punctuation_signs = "../source/lexicon/punctuation_signs.txt" emoti_dict = getEmotiDictionary(file_path=path_to_emoti_file) neut_set = getSetOfWordsFromFile(file_path=path_to_neut_signs_file) polar_noun = getPolarValues(file_path=path_to_polar_nouns) polar_vrb = getPolarValues(file_path=path_to_polar_verbs) polar_adj = getPolarValues(file_path=path_to_polar_adjectives) polar_conj = getPolarValues(file_path=path_to_polar_conjunctions) foma_dividers = getSetOfWordsFromFile(file_path=path_to_foma_dividers) foma_reversers = getSetOfWordsFromFile(file_path=path_to_foma_reversers) meaning_reversers = getSetOfWordsFromFile( file_path=path_to_meaning_reversers) punctuation_signs = getSetOfWordsFromFile( file_path=path_to_punctuation_signs) emoti_dict = organizedListOfEmoties(emoti_dict) analyzer = SentimentAnalyzer(emoti_dict=emoti_dict, neutral_signs=neut_set, polar_nouns=polar_noun, polar_verbs=polar_vrb, polar_adjectives=polar_adj, polar_conjunctions=polar_conj, foma_dividers=foma_dividers, foma_reversers=foma_reversers, punctuation_signs=punctuation_signs, meaning_reversers=meaning_reversers, vrb_prob_coef=3, sent_coef_decr=.2, coef_of_postg_change=.1) # print("FROM ANALYZER",analyzer.isPartOf("",analyzer.polar_noun)) return analyzer
def main(): # before doing anything else, initialize the sentiment analyzer global sentiment_analyzer sentiment_analyzer = SentimentAnalyzer() print("initialized sentiment analyzer") # also initialize the user ids array to all the elements currently in the user ids data file # first create the file open(setup.USER_IDS_PATH, "a").close() # then open it, and load all of its contents into the user_ids list global user_ids with open(setup.USER_IDS_PATH, "r") as user_ids_file: for line in user_ids_file: sml = [x for x in line.strip().split()] user_id = int(sml[0]) hatefulness_score = float(sml[1]) user_ids.append((user_id, hatefulness_score)) # define two threads: one user_abort, and one setup_streamer (which also, incidentally, starts the streamer) abort_thread = Thread(target = user_abort) streamer_thread = Thread(target = setup_streamer) # start them both abort_thread.start() streamer_thread.start()
except ImportError: pass print('async_mode is ' + async_mode) import eventlet eventlet.monkey_patch() app = Flask(__name__) app.config['SECRET_KEY'] = 'secret!' socketio = SocketIO(app, async_mode=async_mode) thread = None cursor = db.cursor() cursor.execute("USE sp_data") analyzer = SentimentAnalyzer() analyzer.set_data(positive_tweets, negative_tweets) #analyzer.train_data() analyzer.get_training_data() tweet_query = "SELECT text, ST_X(coordinates) AS lat, ST_Y(coordinates) AS lon, created_at, country_code, lang FROM test_cases" try: cursor.execute(tweet_query) test_tweets = cursor.fetchall() print "Executing SQL statement" except: print "Error: cannot fetch data!" def do_analysis():
def analysisSearch(): if request.method == 'POST': try: quantities = [] query_search = '' term = request.form['term'] #Term that user types retweets = request.form[ 'retweets'] #if user wants analysis with or without retweets retweets = int(retweets) #converting retweets in int if retweets == 1: #if retweets equals 1 we will exclude retweets and work only with original tweets query_search = term + ' -filter:retweets' query_search = str(query_search) elif retweets == 2: #condition if retweets equals 2 we will work with original tweets and retweets query_search = term query_search = str(query_search) tweets = tweepy.Cursor(api.search, q=query_search, lang='es', tweet_mode='extended').items( 10) #getting 150 tweets sentiment_analyzer = SentimentAnalyzer( ) #instanciate the class SentimentAnalyzer scores_list = sentiment_analyzer.get_scores_list( tweets) #get the list of scores without zeros array_tweets_score = sentiment_analyzer.array_of_tweets_and_score_method( ) #get array of objects with text and score arrays_ordered = sentiment_analyzer.order_arrays_list( ) #list in order to create arrays ordered text_positive = sentiment_analyzer.get_postive_text( arrays_ordered) #get only positive text text_negative = sentiment_analyzer.get_negative_text( arrays_ordered) #get only negative text text_neutral = sentiment_analyzer.get_neutral_text( arrays_ordered) #get only neutral text percentages = sentiment_analyzer.get_percentages( ) #get percentages positive_quantity = len( text_positive) #get quantity of positive tweets negative_quantity = len( text_negative) #get quantity of negative tweets neutral_quantity = len( text_neutral) #get quantity of neutral tweets total_quantity = positive_quantity + negative_quantity + neutral_quantity #get quantity of total tweets quantities.extend([ positive_quantity, negative_quantity, neutral_quantity, total_quantity ]) #insert quantites in array data = [{ "text_positive": text_positive }, { "text_negative": text_negative }, { "text_neutral": text_neutral }, { "percentages": percentages }, { "quantities": quantities }] #creating the array that will be send to the template scores_array = np.array(scores_list) #for plotting sns.set() #for plotting ax = sns.distplot(scores_array) plt.show() #for plotting return render_template( 'showAnalysis.html', data=data) #rendering showAnalysis and passing data as data except tweepy.TweepError as e: print(e.reason)
""" token_list = [] for x in range(len(df)): tokens = BagOfWords().get_tokens(df['body'][x][0]) token_list.append(tokens) df['body_tokens'] = token_list # pp.pprint(df) """ SentimentAnalyzer """ sa = SentimentAnalyzer() # print(sa.do_pos_sentiment_analysis(df['body_tokens'][0])) # print(sa.do_neg_sentiment_analysis(df['body_tokens'][0])) pos_sent_list = [] for x in range(len(df)): pos_sent_result = sa.do_pos_sentiment_analysis(df['body_tokens'][x]) pos_sent_list.append(pos_sent_result[0]) df['% Positive'] = pos_sent_list neg_sent_list = [] for x in range(len(df)): neg_sent_result = sa.do_neg_sentiment_analysis(df['body_tokens'][x]) neg_sent_list.append(neg_sent_result[0])
# connect to kafka producer kafka_producer = KafkaProducer(bootstrap_servers=[kafka_host], value_serializer=lambda x: dumps(x).encode('utf-8')) tr = TweetRetriever() # shared object between consumer and producer. queue = Queue(maxsize=20000) producer = Producer_And_Consume.Producer("producer", queue) trends_consumer = [] sentiment_analyzers = [] for i in range(no_of_config_key): sentiment_analyzer = SentimentAnalyzer(tr, [twitter_keys[i]['streamConsumerKey'], twitter_keys[i]['streamConsumerSecret'], twitter_keys[i]['streamAccessTokenKey'], twitter_keys[i]['streamAccessTokenSecret']]) try: log.info("Starting Producer_And_Consume.ConsumerThread") cr = Producer_And_Consume.ConsumerThread(str(i), queue, kafka_producer, sentiments, sentiment_analyzer, consumer_sleep_time) trends_consumer.append(cr) sentiment_analyzers.append(sentiment_analyzer) cr.start() except Exception as ex: log.error("failed to start consumer threads." + str(ex)) aggregator = Aggregator(paths[0], paths[1]) # Elastic Beanstalk application setup # EB looks for an 'application' callable by default
def demo_tweets(trainer, n_instances=None, output=None): """ Train and test Naive Bayes classifier on 10000 tweets, tokenized using TweetTokenizer. Features are composed of: - 1000 most frequent unigrams - 100 top bigrams (using BigramAssocMeasures.pmi) :param trainer: `train` method of a classifier. :param n_instances: the number of total tweets that have to be used for training and testing. Tweets will be equally split between positive and negative. :param output: the output file where results have to be reported. """ from nltk.tokenize import TweetTokenizer from sentiment_analyzer import SentimentAnalyzer from nltk.corpus import twitter_samples, stopwords # Different customizations for the TweetTokenizer tokenizer = TweetTokenizer(preserve_case=False) # tokenizer = TweetTokenizer(preserve_case=True, strip_handles=True) # tokenizer = TweetTokenizer(reduce_len=True, strip_handles=True) if n_instances is not None: n_instances = int(n_instances/2) fields = ['id', 'text'] positive_json = twitter_samples.abspath("positive_tweets.json") positive_csv = 'positive_tweets.csv' json2csv_preprocess(positive_json, positive_csv, fields, limit=n_instances) negative_json = twitter_samples.abspath("negative_tweets.json") negative_csv = 'negative_tweets.csv' json2csv_preprocess(negative_json, negative_csv, fields, limit=n_instances) neg_docs = parse_tweets_set(negative_csv, label='neg', word_tokenizer=tokenizer) pos_docs = parse_tweets_set(positive_csv, label='pos', word_tokenizer=tokenizer) # We separately split subjective and objective instances to keep a balanced # uniform class distribution in both train and test sets. train_pos_docs, test_pos_docs = split_train_test(pos_docs) train_neg_docs, test_neg_docs = split_train_test(neg_docs) training_tweets = train_pos_docs+train_neg_docs testing_tweets = test_pos_docs+test_neg_docs sentim_analyzer = SentimentAnalyzer() # stopwords = stopwords.words('english') # all_words = [word for word in sentim_analyzer.all_words(training_tweets) if word.lower() not in stopwords] all_words = [word for word in sentim_analyzer.all_words(training_tweets)] # Add simple unigram word features unigram_feats = sentim_analyzer.unigram_word_feats(all_words, top_n=1000) sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats) # Add bigram collocation features bigram_collocs_feats = sentim_analyzer.bigram_collocation_feats([tweet[0] for tweet in training_tweets], top_n=100, min_freq=12) sentim_analyzer.add_feat_extractor(extract_bigram_feats, bigrams=bigram_collocs_feats) training_set = sentim_analyzer.apply_features(training_tweets) test_set = sentim_analyzer.apply_features(testing_tweets) classifier = sentim_analyzer.train(trainer, training_set) # classifier = sentim_analyzer.train(trainer, training_set, max_iter=4) try: classifier.show_most_informative_features() except AttributeError: print('Your classifier does not provide a show_most_informative_features() method.') results = sentim_analyzer.evaluate(test_set) if output: extr = [f.__name__ for f in sentim_analyzer.feat_extractors] output_markdown(output, Dataset='labeled_tweets', Classifier=type(classifier).__name__, Tokenizer=tokenizer.__class__.__name__, Feats=extr, Results=results, Instances=n_instances)
from sentiment_analyzer import SentimentAnalyzer import warnings if __name__ == '__main__': #warnings.simplefilter(action='ignore', category=FutureWarning) test_list = [ "This is a test example, which is very happy and joyous and I am glad that this works", "This is a second test case example that is sad and in fact, sucks." ] ob = SentimentAnalyzer() output_sentences = ob.get_string(test_list) for i in range(len(test_list)): print(test_list[i]) print(output_sentences[i]) print("\n\n\n")
# Trainer for the El Nino Tracker # This file is used to train data for the el nino tracker applicaiton from sentiment_analyzer import SentimentAnalyzer from training_data import positive_tweets,negative_tweets analyzer = SentimentAnalyzer() analyzer.set_data(positive_tweets,negative_tweets) print "data set" print "training data..." analyzer.train_data() print "Finished training data. Training data is saved in sp_classifier.pickle file"
from sentiment_analyzer import SentimentAnalyzer import json from flask import Flask from flask import render_template from flask import request app = Flask(__name__) app.config.from_pyfile('config.py') movies_list = [] sa = SentimentAnalyzer() @app.route('/') def movies(): with open('data/movies_list.json') as f: data = json.load(f) movies_list = data["movies"] return render_template('index.html', movies=movies_list) @app.route('/send/<movie_id>', methods=['POST']) def send(movie_id): feedback = request.form['feedback'] res = sa.predict(feedback) return json.dumps({'status': 'OK', 'res': str(res[0])}) if __name__ == '__main__': train_model = app.config['TRAIN']
def __init__(self): self.sentiment = SentimentAnalyzer() self.clf = self.sentiment.clf
if __name__ == "__main__": do_pickle = False do_train_data = False do_fetch_data = False do_preprocess_data = False do_cross_validation_strategy = False do_holdout_strategy = False do_analyze_visualize = False # Create 'pickled' and 'plots' directories if not exists Path('./pickled').mkdir(exist_ok=True) Path('./plots').mkdir(exist_ok=True) if do_fetch_data or do_preprocess_data or do_cross_validation_strategy or do_holdout_strategy or do_analyze_visualize: sentiment = SentimentAnalyzer() if do_fetch_data: sentiment.getInitialData('datasets/product_reviews.json', do_pickle) if do_preprocess_data: reviews_df = pd.read_pickle('pickled/product_reviews.pickle') sentiment.preprocessData(reviews_df, do_pickle) if do_cross_validation_strategy or do_holdout_strategy: reviews_df_preprocessed = pd.read_pickle( 'pickled/product_reviews_preprocessed.pickle') print(reviews_df_preprocessed.isnull().values.sum() ) # Check for any null values if do_cross_validation_strategy:
except ImportError: pass print('async_mode is ' + async_mode) import eventlet eventlet.monkey_patch() app = Flask(__name__) app.config['SECRET_KEY'] = 'secret!' socketio = SocketIO(app, async_mode=async_mode) thread = None cursor = db.cursor() cursor.execute("USE sp_data") analyzer = SentimentAnalyzer() analyzer.set_data(positive_tweets,negative_tweets) #analyzer.train_data() analyzer.get_training_data() tweet_query = "SELECT text, ST_X(coordinates) AS lat, ST_Y(coordinates) AS lon, created_at, country_code, lang FROM test_cases" try: cursor.execute(tweet_query) test_tweets = cursor.fetchall() print "Executing SQL statement" except: print "Error: cannot fetch data!" def do_analysis(): tweet = { "text": "", "lat": 0, "lon": 0, "created_at": "", "country_code": 0, "lang": "" }
def test_init_positive01(self): sent = SentimentAnalyzer(feature_extractor=self.feature_extractor, classifier=self.classifier) del sent
from sentiment_analyzer import SentimentAnalyzer from symbol_scanner import SymbolScanner from twitter_scanner import TwitterScanner from notifier import Notifier import time symbols = SymbolScanner().company_list for symbol in symbols: time.sleep(5) twitter = TwitterScanner(symbol) tweets = twitter.tweets sentiment = SentimentAnalyzer(tweets) print "Symbol: " + symbol + " Avg Sentiment: " + str( sentiment.average_sentiment) #Notifier("YOUR_TWITTER_USERNAME", symbol, sentiment.average_sentiment) sentiment.reset() twitter.reset()