예제 #1
0
파일: crawler.py 프로젝트: jgera/DaBuzz
def get_stories():
  stories = []
  #Gets top stories from MarketWatch and analyzes their sentiment
  urls = ["http://feeds.marketwatch.com/marketwatch/topstories/",
          "http://news.google.com/news?pz=1&cf=all&ned=us&hl=en&topic=b&output=rss"]
  
  for url in urls:
    feed = feedparser.parse(url)
    for entry in feed['entries']:
      title = entry['title']
      url = entry['link']
      summary = html_extract.text_from_url(url)
      results = sentiment(summary)
      if results["label"] != "Error":
        pos = float(results["probability"]["pos"])
        neg = float(results["probability"]["neg"])
        neu = float(results["probability"]["neutral"])
        stories.append( (url, title, summary, pos, neg, neu) )
  return stories
예제 #2
0
파일: fix_old.py 프로젝트: jgera/DaBuzz
def main():
    dbinfo = recover()
    conn = MySQLdb.connect(**dbinfo)

    cur = conn.cursor()

    sql = "SELECT id,url FROM articles"
    cur.execute(sql)
    for aid, url in cur.fetchall():
        aid = int(aid)

        with open("bad_urls", "w") as f:
            try:
                article_text = text_from_url(url)
            except:
                import sys
                f.write(str(aid) + " " + str(sys.exc_info()) + "\n")

        sql = "UPDATE articles SET article_text=%s WHERE id=%s"
        args = [article_text, aid]
        cur.execute(sql, args)

    conn.commit()
예제 #3
0
def main():
  dbinfo = recover()
  conn = MySQLdb.connect(**dbinfo)

  cur = conn.cursor()

  sql = "SELECT id,url FROM articles"
  cur.execute(sql)
  for aid,url in cur.fetchall():
    aid = int(aid)

    with open("bad_urls","w") as f:
      try:
        article_text = text_from_url(url)
      except:
        import sys
        f.write(str(aid) + " " + str(sys.exc_info()) + "\n")

    sql = "UPDATE articles SET article_text=%s WHERE id=%s"
    args = [article_text,aid]
    cur.execute(sql,args)

  conn.commit()