feed = feedparser.parse(source_url)
entries = feed.entries

for entry in entries:

    link = entry.link
    url_hash = hashlib.md5(link).hexdigest()
    date = entry.published_parsed

    published_date = arrow.get(date).to('US/Pacific').date().strftime(
        '%Y-%m-%d')
    published_ts = arrow.get(date).to('US/Pacific').to('UTC').timestamp

    # See if we already have this story
    try:
        NewsItem.get(NewsItem.url_hash == url_hash)
        print 'Item exists. Skipping.'
        continue
    except peewee.DoesNotExist:
        print 'Creating item.'
        item = NewsItem()

    headline = entry.title
    summary = entry.summary

    item.url_hash = url_hash
    item.link = link
    item.source = 'Seattle Times'
    item.title = headline
    item.summary = summary
    item.published_date = published_date
source_url = 'http://blogs.seattletimes.com/today/category/weather-beat/feed/'
feed = feedparser.parse(source_url)
entries = feed.entries

for entry in entries:
    
    link = entry.link
    url_hash = hashlib.md5(link).hexdigest()
    date = entry.published_parsed

    published_date = arrow.get(date).to('US/Pacific').date().strftime('%Y-%m-%d')
    published_ts = arrow.get(date).to('US/Pacific').to('UTC').timestamp

    # See if we already have this story
    try:
        NewsItem.get(NewsItem.url_hash==url_hash)
        print 'Item exists. Skipping.'
        continue
    except peewee.DoesNotExist:
        print 'Creating item.'
        item = NewsItem()

    headline = entry.title
    summary = entry.summary

    item.url_hash = url_hash
    item.link = link
    item.source = 'Seattle Times'
    item.title = headline
    item.summary = summary
    item.published_date = published_date
Example #3
0
import peewee
import sys

from models import NewsItem

if len(sys.argv) < 2:
    sys.exit('Usage: $ python hide_story.py <story id>')
else:
    id = sys.argv[1]

try:
    id = int(id)
except ValueError:
    sys.exit("Invalid Story ID")

# Try to fetch the item
try:
    item = NewsItem.get(NewsItem.id == id)
except peewee.DoesNotExist:
    sys.exit("Error! News Item with ID %d not found" % id)

print 'You are attempting to hide story id %d' % id
print 'Headline: %s' % item.title

confirm = raw_input("Are you sure? Y/n: ")
if confirm == 'Y':
    item.hidden = True
    item.save()
Example #4
0
import peewee
import sys

from models import NewsItem

if len(sys.argv) < 2:
    sys.exit('Usage: $ python hide_story.py <story id>')
else:
    id = sys.argv[1]
    
try:
    id = int(id)
except ValueError:
    sys.exit("Invalid Story ID")

# Try to fetch the item
try:
    item = NewsItem.get(NewsItem.id==id)
except peewee.DoesNotExist:
    sys.exit("Error! News Item with ID %d not found" % id)

print 'You are attempting to hide story id %d' % id
print 'Headline: %s' % item.title

confirm = raw_input("Are you sure? Y/n: ")
if confirm == 'Y':
    item.hidden = True
    item.save()
Example #5
0
from models import NewsItem
from datetime import datetime

scrapper = Scrap()

news_items = scrapper.get_news_items()

for item in news_items:
    ## let's insert stuff
    # first check that it does not already exist!
    to_db = NewsItem.select().where(NewsItem.slug == item['slug'])
    if len(to_db) == 0:
        # item has not been in the db before
        to_db = NewsItem(news_hash=item['news_hash'],
                         slug=item['slug'],
                         news_title=item['title'],
                         news_link=item['link'],
                         date_updated=item['date_updated'],
                         intro_text=item['intro_text'],
                         scrapped_at=datetime.now())
        to_db.save()
    else:
        to_db = to_db.get()
        # check if the hash has changed
        if to_db.news_hash != item['news_hash']:
            to_db.news_hash = item['news_hash']
            to_db.news_title = item['news_title']
            to_db.news_link = item['link']
            to_db.date_updated = item['date_updated']
            to_db.intro_text = item['intro_text']
            to_db.save()