def get_stories(): stories = [] #Gets top stories from MarketWatch and analyzes their sentiment urls = ["http://feeds.marketwatch.com/marketwatch/topstories/", "http://news.google.com/news?pz=1&cf=all&ned=us&hl=en&topic=b&output=rss"] for url in urls: feed = feedparser.parse(url) for entry in feed['entries']: title = entry['title'] url = entry['link'] summary = html_extract.text_from_url(url) results = sentiment(summary) if results["label"] != "Error": pos = float(results["probability"]["pos"]) neg = float(results["probability"]["neg"]) neu = float(results["probability"]["neutral"]) stories.append( (url, title, summary, pos, neg, neu) ) return stories
def main(): dbinfo = recover() conn = MySQLdb.connect(**dbinfo) cur = conn.cursor() sql = "SELECT id,url FROM articles" cur.execute(sql) for aid, url in cur.fetchall(): aid = int(aid) with open("bad_urls", "w") as f: try: article_text = text_from_url(url) except: import sys f.write(str(aid) + " " + str(sys.exc_info()) + "\n") sql = "UPDATE articles SET article_text=%s WHERE id=%s" args = [article_text, aid] cur.execute(sql, args) conn.commit()
def main(): dbinfo = recover() conn = MySQLdb.connect(**dbinfo) cur = conn.cursor() sql = "SELECT id,url FROM articles" cur.execute(sql) for aid,url in cur.fetchall(): aid = int(aid) with open("bad_urls","w") as f: try: article_text = text_from_url(url) except: import sys f.write(str(aid) + " " + str(sys.exc_info()) + "\n") sql = "UPDATE articles SET article_text=%s WHERE id=%s" args = [article_text,aid] cur.execute(sql,args) conn.commit()