예제 #1
0
 def post(self):
     key = self.request.get('key')
     feed = NewsFeed.get_by_key_name(key)
     # FIXME check if feed was retrieved
     result = urlfetch.fetch(feed.url)
     if result.status_code == 200:
         rssfeed = feedparser.parse(result.content)
         for i in rssfeed.entries:
             item = NewsItem(key_name=i.guid)
             item.url = i.link
             item.title = i.title
             item.text = i.summary
             item.date = datetime.datetime(*i.date_parsed[:6])
             item.orderdate = datetime.datetime(*i.date_parsed[:6])
             item.source = feed
             item.put()
         feed.last_fetch = datetime.datetime.now() 
         feed.put() 
         taskqueue.add(queue_name='fetch-news-queue', url='/admin/feeds/fetch/', params={'key':feed.key})
         self.response.out.write('feed pulled')
     else:  
         self.error(500)
예제 #2
0
    def get(self): 
        url = settings.YAHOO_PIPE % 'rss'  
        result = urlfetch.fetch(url) 
        if result.status_code == 200:
            feed = feedparser.parse(result.content) 
            for i in feed.entries:  
                item = NewsItem(key_name=i.guid) 
                item.url = i.link
                item.title = i.title 
                item.text = i.summary
                item.date = datetime.datetime(*i.date_parsed[:6])
                item.orderdate = datetime.datetime(*i.date_parsed[:6])
                item.put() 

            items = db.GqlQuery("SELECT * FROM NewsItem ORDER BY orderdate DESC LIMIT 100")
 
            context = {'news':items }
            #context = add_user_to_context(context)
            self.response.out.write(
               template.render(tmpl('templates/news2.html'),
               context ))
        else: 
            self.response.out.write('err') 
예제 #3
0
    link = entry.link
    url_hash = hashlib.md5(link).hexdigest()
    date = entry.published_parsed

    published_date = arrow.get(date).to('US/Pacific').date().strftime(
        '%Y-%m-%d')
    published_ts = arrow.get(date).to('US/Pacific').to('UTC').timestamp

    # See if we already have this story
    try:
        NewsItem.get(NewsItem.url_hash == url_hash)
        print 'Item exists. Skipping.'
        continue
    except peewee.DoesNotExist:
        print 'Creating item.'
        item = NewsItem()

    headline = entry.title
    summary = entry.summary

    item.url_hash = url_hash
    item.link = link
    item.source = 'Seattle Times'
    item.title = headline
    item.summary = summary
    item.published_date = published_date
    item.published_ts = published_ts
    item.inserted_ts = arrow.utcnow().timestamp

    item.save()
예제 #4
0
    
    link = entry.link
    url_hash = hashlib.md5(link).hexdigest()
    date = entry.published_parsed

    published_date = arrow.get(date).to('US/Pacific').date().strftime('%Y-%m-%d')
    published_ts = arrow.get(date).to('US/Pacific').to('UTC').timestamp

    # See if we already have this story
    try:
        NewsItem.get(NewsItem.url_hash==url_hash)
        print 'Item exists. Skipping.'
        continue
    except peewee.DoesNotExist:
        print 'Creating item.'
        item = NewsItem()

    headline = entry.title
    summary = entry.summary

    item.url_hash = url_hash
    item.link = link
    item.source = 'Seattle Times'
    item.title = headline
    item.summary = summary
    item.published_date = published_date
    item.published_ts = published_ts
    item.inserted_ts = arrow.utcnow().timestamp

    item.save()
예제 #5
0
    for prefix in skippable_headline_prefixes:
        if entry.title.startswith(prefix):
            prefix_match = True
    
    if prefix_match:
        print 'Skipping story'
        continue
    
    # See if we already have this story
    try:
        NewsItem.get(NewsItem.url_hash==url_hash)
        print 'Item exists. Skipping.'
        continue
    except peewee.DoesNotExist:
        print 'Creating item.'
        item = NewsItem()

    soup = bs4.BeautifulSoup(entry.description, 'html.parser')
    item.summary = unidecode.unidecode(soup.text.strip())
    item.title = unidecode.unidecode(entry.title)

    item.url_hash = url_hash
    item.link = link
    item.authors = ''
    item.source = 'Capital WX Gang'
    item.published_date = published_date
    item.published_ts = published_ts
    item.inserted_ts = arrow.utcnow().timestamp

    item.save()
예제 #6
0
    
    # See if any of the skippable ids are in the story ids
    if pcollid in skippable_collection_ids:
        print 'Skipping %s story' % pcollid
        continue

    # If it's also published on weather underground, skip it
    if 'wunderground' in tags:
        print 'Skipping Weather Underground Story'
        continue

    # See if the story already exists
    try:
        item = NewsItem.get(NewsItem.url_hash==url_hash)
        print 'Item Exists. Skipping.'
        continue
    except peewee.DoesNotExist:
        print 'Creating new item.'
        item = NewsItem()

    item.url_hash = url_hash
    item.title = unidecode.unidecode(entry['title'].strip())
    item.summary = unidecode.unidecode(entry['description'].strip())
    item.source = "Weather Channel"
    item.link = link
    item.published_date = published_date
    item.published_ts = published_ts
    item.inserted_ts = arrow.utcnow().timestamp

    item.save()
예제 #7
0
    # See if any of the skippable ids are in the story ids
    if pcollid in skippable_collection_ids:
        print 'Skipping %s story' % pcollid
        continue

    # If it's also published on weather underground, skip it
    if 'wunderground' in tags:
        print 'Skipping Weather Underground Story'
        continue

    # See if the story already exists
    try:
        item = NewsItem.get(NewsItem.url_hash == url_hash)
        print 'Item Exists. Skipping.'
        continue
    except peewee.DoesNotExist:
        print 'Creating new item.'
        item = NewsItem()

    item.url_hash = url_hash
    item.title = unidecode.unidecode(entry['title'].strip())
    item.summary = unidecode.unidecode(entry['description'].strip())
    item.source = "Weather Channel"
    item.link = link
    item.published_date = published_date
    item.published_ts = published_ts
    item.inserted_ts = arrow.utcnow().timestamp

    item.save()
예제 #8
0
    for prefix in skippable_headline_prefixes:
        if entry.title.startswith(prefix):
            prefix_match = True

    if prefix_match:
        print 'Skipping story'
        continue

    # See if we already have this story
    try:
        NewsItem.get(NewsItem.url_hash == url_hash)
        print 'Item exists. Skipping.'
        continue
    except peewee.DoesNotExist:
        print 'Creating item.'
        item = NewsItem()

    soup = bs4.BeautifulSoup(entry.description, 'html.parser')
    item.summary = unidecode.unidecode(soup.text.strip())
    item.title = unidecode.unidecode(entry.title)

    item.url_hash = url_hash
    item.link = link
    item.authors = ''
    item.source = 'Capital WX Gang'
    item.published_date = published_date
    item.published_ts = published_ts
    item.inserted_ts = arrow.utcnow().timestamp

    item.save()