def scan_post(post): # check if there are any symbols in the post syms = [] for s in TRACKED_SYMBOLS: if scan_post_for_symbol(post,s): syms.append(s['ticker']) # if any symbols present, save t if syms: post['syms'] = syms # scan for any profanity if profanity.is_profane(post['fulltext']) or profanity.is_profane(post['title']): post['profanity'] = True # scan for sentiment # prioritise fulltext sen = 0.5 # inside try as if it goes wrong want to just mark it neutral as posts are limited try: if len(post['fulltext']): sen = sentiment.classify(post['fulltext']) elif len(post['title']): sen = sentiment.classify(post['title']) else: # if all are null this is pointless anyway return except: traceback.print_exc() L.error("Failed to get sentiment") L.info("Found sentiment: {}".format(sen)) post['sentiment'] = sen OUTBOUND_QUEUE.put(post)
def buttonClicked(self): self.te.insertPlainText(" " + self.le.text()[5:]) if General.getLastSentence() == "": Chat.Chat(self.le.text()[5:]) self.te.setAlignment(Qt.AlignLeft) self.te.append(Chat.popStack()) else: print(General.getLastSentence()) if (len(General.getLastSentence().split("!!")) != 1): if (General.getLastSentence().split("!!")[1] == "place"): if sentiment.classify(self.le.text()[5:]) == "Negative": out = negativePlace[random.randint( 0, len(negativePlace) - 1)] Chat.Chat(self.le.text()[5:]) self.te.append(out + Chat.popStack().lower()) else: out = positivePlace[random.randint( 0, len(positivePlace) - 1)] Chat.Chat(self.le.text()[5:]) self.te.append(out + Chat.popStack().lower()) elif sentiment.classify(self.le.text()[5:]) == "Negative": out = negativePerson[random.randint( 0, len(negativePerson) - 1)] Chat.Chat(self.le.text()[5:]) self.te.append(out + Chat.popStack().lower()) else: out = positivePerson[random.randint( 0, len(positivePerson) - 1)] Chat.Chat(self.le.text()[5:]) self.te.append(out + Chat.popStack().lower()) else: Chat.Chat(self.le.text()[5:]) self.te.append(Chat.popStack()) self.le.setText("You: ")
def on_data(self, data): # TODO figure out encoding problems try: data = simplejson.loads(data, "utf-8") tweetID = data["id_str"] text = data["text"] coordinates = data["coordinates"] # construct parse geopoint location = None print("\n\nReceived tweet {}: {}".format(tweetID, text)) if coordinates: location = GeoPoint(latitude=coordinates["coordinates"][1], longitude=coordinates["coordinates"][0]) polarity = sentiment.classify(text) daily_averages = DailyAverage.Query.all().order_by("-createdAt") calls_made = list(daily_averages)[0].totalTweetsYaks topics = [] emotions = [] if calls_made < 1000: topics = taxonomy.classify(text) # take function classify from taxonomy, run it with given text if calls_made < 300: emotions = emotion.get_emotions(text) tweet = Tweet(tweetID=tweetID, polarity=polarity, topics=topics, emotions=emotions, text=text, location=location) try: tweet.save() print("Saved tweet object ({})".format(tweet.objectId)) except: print("Tweet has already been saved.") except UnicodeEncodeError as e: # need to handle weird characters somehow print("UnicodeEncodeError: {}".format(e)) except TypeError as e: print("TypeError: {}".format(e)) except: print("Unexpected error: {}".format(sys.exc_info()[0])) return True
def write_predictions(filedir, inqtabs_dict, swn_dict, output_file_name): testfiledir = os.path.join(filedir, 'test') with open(output_file_name, 'w') as csvfile: writer = csv.DictWriter(csvfile, delimiter=',', fieldnames=['FileIndex', 'Category']) writer.writeheader() for filename in sorted(os.listdir(testfiledir), key=lambda x: int(os.path.splitext(x)[0])): with open(os.path.join(testfiledir, filename)) as reviewfile: review = reviewfile.read() prediction = dict() prediction['FileIndex'] = os.path.splitext(filename)[0] prediction['Category'] = sentiment.classify( review, inqtabs_dict, swn_dict) writer.writerow(prediction)
def get_training_accuracy(data, inqtabs_dict, swn_dict): num_correct = 0 etype_files = {} for etype in ["fp", "fn", "tp", "tn"]: etype_files[etype] = open(etype + '.txt', 'w') for row in data: sentiment_prediction = sentiment.classify(row['Review'], inqtabs_dict, swn_dict) sentiment_label = int(row['Category']) if sentiment_prediction == sentiment_label: num_correct += 1 etype = sentiment.get_error_type(sentiment_prediction, sentiment_label) etype_files[etype].write("%s\t%s\n" % (row['FileIndex'], row['Review'])) accuracy = num_correct * 1.0 / len(data) for etype in ["fp", "fn", "tp", "tn"]: etype_files[etype].close() print("Accuracy: " + str(accuracy)) return accuracy
def get_save_Twits(): #url for StockTwits url = "http://stocktwits.com/symbol/AAPL?q=%24AAPL" content = urllib2.urlopen(url).read() soup = BeautifulSoup(content, "lxml") ol = soup.find("ol", {"class": "stream-list show-conversation stream-poller"}).find_all(attrs={"data-src": True}) #Acquiring and saving Twits d = {} for i in ol: d = json.loads(i["data-src"].encode('ascii', 'ignore').decode('ascii')) #Twit attributes twitID = d["id"] twitBody = d["body"] sentimentTwit = "" liked = d["liked"] total_likes = d["total_likes"] username = d["user"]["username"] if (d["sentiment"] != None): sentimentTwit = d["sentiment"]["name"] print("\n\nReceived Twit {}: {}".format(twitID, twitBody)) print(sentimentTwit) polarity = sentiment.classify(twitBody) twit = Twit(liked = liked, total_likes = total_likes, twitID = twitID, sentiment = sentimentTwit, messageText = twitBody, username = username, Polarity = polarity) try: twit.save() print("Saved twit object ({})".format(twit.objectId)) except: print("Twit has already been saved.")
def index(): request = session.get("request", None) tweets = [] mood = [] query_mood = "" if (request is not None): for tweet in Twitter.query(request, 40)["statuses"]: tweets.append(tweet["text"]) mood = classify(_classifier, _featx, tweets) # Then evaluate the "global mood" if mood.count("pos") > mood.count("neg"): query_mood = "pos" elif mood.count("pos") < mood.count("neg"): query_mood = "neg" else: query_mood = "neu" # Clear session after processing session["request"] = None return render_template( "index.html", entries=tweets, mood=mood, query_mood=query_mood, keywords=request)
def mood(bot, update, args): # Abort if args is empty if args == []: return None logger.info( "/mood request from %s, query: %s" % (update.message.chat_id, args)) # Let the user know that the bot is "typing.." bot.sendChatAction( chat_id=update.message.chat_id, action=ChatAction.TYPING) # Read query and submit it via python-twitter-tools, then store tweets query = " ".join(tag for tag in args) tweets = [] mood = [] query_mood = "" EMOJI = None for tweet in Twitter.query(query, 40)["statuses"]: tweets.append(tweet["text"]) # Request a classification mood = classify(_classifier, _featx, tweets) # Then evaluate the "global mood" if mood.count("pos") > mood.count("neg"): query_mood = "positive" EMOJI = "😃".decode("utf-8") elif mood.count("pos") < mood.count("neg"): query_mood = "negative" EMOJI = "😔".decode("utf-8") else: query_mood = "neutral" EMOJI = "😌".decode("utf-8") bot.sendMessage( chat_id=update.message.chat_id, text="%s People is now feeling %s about %s" % (EMOJI, query_mood, query))
def run_service(): n = len(urls) logger.info(f"Fetching from {n} RSS Feeds\n") for i, (key, url) in enumerate(urls.items(), 1): logger.info(f"{i}/{n} Fetching {key} feed...") try: f = feedparser.parse(url) feed_metadata = f.feed feed = Feed.objects(href=url).first() title = feed_metadata.title if feed_metadata.title_detail.type == "text/html": title = " ".join(BeautifulSoup(title).stripped_strings) subtitle = feed_metadata.subtitle if feed_metadata.subtitle_detail.type == "text/html": subtitle = " ".join( BeautifulSoup(subtitle, features="lxml").stripped_strings) generator = feed_metadata.generator updated_struct_time = f.updated_parsed last_updated = (datetime.fromtimestamp(mktime(updated_struct_time)) if updated_struct_time else None) if feed: if feed.last_updated < last_updated: feed.update(set__title=title) feed.update(set__subtitle=subtitle) feed.update(set__generator=generator) feed.update(set__last_updated=last_updated) else: continue else: feed = Feed( href=url, title=title, subtitle=subtitle, generator=generator, last_updated=last_updated, ) feed_ref = feed.to_dbref() except Exception as e: logger.error(f"Error while getting {key} feed.{e}") continue for e in tqdm(f.entries, desc=f"Fetching articles from {key} feed.", unit="article"): try: link = e.link if entry := Entry.objects(link=link).first(): continue title = e.title if e.title_detail.type == "text/html": title = " ".join(BeautifulSoup(title).stripped_strings) summary = e.summary if e.summary_detail.type == "text/html": summary = " ".join( BeautifulSoup(summary, features="lxml").stripped_strings) published_struct_time = e.published_parsed published = (datetime.fromtimestamp( mktime(published_struct_time)) if published_struct_time else None) a = newspaper.Article(link) a.download() a.parse() a.nlp() sentiment = classify(a.text) if a.summary == "you are here:": continue article = Article( title=a.title, html=a.html, tags=a.tags, keywords=a.keywords, summary=a.summary, text=a.text, ) entry = Entry( link=link, title=title, summary=summary, published=published, feed=feed_ref, article=article, sentiment=sentiment, ) feed.entries.append(entry.to_dbref()) entry.save() feed.save() except Exception as e: logger.error( f"Error while getting an article from {key} feed. {e}") finally:
def classify_positive(n): if 'declined to make' in n or len(n.strip())<1: return "N/A" return sentiment.classify(n, clf)['pos']
#!/usr/local/bin/python #encoding=utf8 import sentiment sentence = "今天开心啊!" print sentiment.classify(sentence)