feed = feedparser.parse(source_url) entries = feed.entries for entry in entries: link = entry.link url_hash = hashlib.md5(link).hexdigest() date = entry.published_parsed published_date = arrow.get(date).to('US/Pacific').date().strftime( '%Y-%m-%d') published_ts = arrow.get(date).to('US/Pacific').to('UTC').timestamp # See if we already have this story try: NewsItem.get(NewsItem.url_hash == url_hash) print 'Item exists. Skipping.' continue except peewee.DoesNotExist: print 'Creating item.' item = NewsItem() headline = entry.title summary = entry.summary item.url_hash = url_hash item.link = link item.source = 'Seattle Times' item.title = headline item.summary = summary item.published_date = published_date
source_url = 'http://blogs.seattletimes.com/today/category/weather-beat/feed/' feed = feedparser.parse(source_url) entries = feed.entries for entry in entries: link = entry.link url_hash = hashlib.md5(link).hexdigest() date = entry.published_parsed published_date = arrow.get(date).to('US/Pacific').date().strftime('%Y-%m-%d') published_ts = arrow.get(date).to('US/Pacific').to('UTC').timestamp # See if we already have this story try: NewsItem.get(NewsItem.url_hash==url_hash) print 'Item exists. Skipping.' continue except peewee.DoesNotExist: print 'Creating item.' item = NewsItem() headline = entry.title summary = entry.summary item.url_hash = url_hash item.link = link item.source = 'Seattle Times' item.title = headline item.summary = summary item.published_date = published_date
import peewee import sys from models import NewsItem if len(sys.argv) < 2: sys.exit('Usage: $ python hide_story.py <story id>') else: id = sys.argv[1] try: id = int(id) except ValueError: sys.exit("Invalid Story ID") # Try to fetch the item try: item = NewsItem.get(NewsItem.id == id) except peewee.DoesNotExist: sys.exit("Error! News Item with ID %d not found" % id) print 'You are attempting to hide story id %d' % id print 'Headline: %s' % item.title confirm = raw_input("Are you sure? Y/n: ") if confirm == 'Y': item.hidden = True item.save()
import peewee import sys from models import NewsItem if len(sys.argv) < 2: sys.exit('Usage: $ python hide_story.py <story id>') else: id = sys.argv[1] try: id = int(id) except ValueError: sys.exit("Invalid Story ID") # Try to fetch the item try: item = NewsItem.get(NewsItem.id==id) except peewee.DoesNotExist: sys.exit("Error! News Item with ID %d not found" % id) print 'You are attempting to hide story id %d' % id print 'Headline: %s' % item.title confirm = raw_input("Are you sure? Y/n: ") if confirm == 'Y': item.hidden = True item.save()
from models import NewsItem from datetime import datetime scrapper = Scrap() news_items = scrapper.get_news_items() for item in news_items: ## let's insert stuff # first check that it does not already exist! to_db = NewsItem.select().where(NewsItem.slug == item['slug']) if len(to_db) == 0: # item has not been in the db before to_db = NewsItem(news_hash=item['news_hash'], slug=item['slug'], news_title=item['title'], news_link=item['link'], date_updated=item['date_updated'], intro_text=item['intro_text'], scrapped_at=datetime.now()) to_db.save() else: to_db = to_db.get() # check if the hash has changed if to_db.news_hash != item['news_hash']: to_db.news_hash = item['news_hash'] to_db.news_title = item['news_title'] to_db.news_link = item['link'] to_db.date_updated = item['date_updated'] to_db.intro_text = item['intro_text'] to_db.save()