def post(self): key = self.request.get('key') feed = NewsFeed.get_by_key_name(key) # FIXME check if feed was retrieved result = urlfetch.fetch(feed.url) if result.status_code == 200: rssfeed = feedparser.parse(result.content) for i in rssfeed.entries: item = NewsItem(key_name=i.guid) item.url = i.link item.title = i.title item.text = i.summary item.date = datetime.datetime(*i.date_parsed[:6]) item.orderdate = datetime.datetime(*i.date_parsed[:6]) item.source = feed item.put() feed.last_fetch = datetime.datetime.now() feed.put() taskqueue.add(queue_name='fetch-news-queue', url='/admin/feeds/fetch/', params={'key':feed.key}) self.response.out.write('feed pulled') else: self.error(500)
def get(self): url = settings.YAHOO_PIPE % 'rss' result = urlfetch.fetch(url) if result.status_code == 200: feed = feedparser.parse(result.content) for i in feed.entries: item = NewsItem(key_name=i.guid) item.url = i.link item.title = i.title item.text = i.summary item.date = datetime.datetime(*i.date_parsed[:6]) item.orderdate = datetime.datetime(*i.date_parsed[:6]) item.put() items = db.GqlQuery("SELECT * FROM NewsItem ORDER BY orderdate DESC LIMIT 100") context = {'news':items } #context = add_user_to_context(context) self.response.out.write( template.render(tmpl('templates/news2.html'), context )) else: self.response.out.write('err')
link = entry.link url_hash = hashlib.md5(link).hexdigest() date = entry.published_parsed published_date = arrow.get(date).to('US/Pacific').date().strftime( '%Y-%m-%d') published_ts = arrow.get(date).to('US/Pacific').to('UTC').timestamp # See if we already have this story try: NewsItem.get(NewsItem.url_hash == url_hash) print 'Item exists. Skipping.' continue except peewee.DoesNotExist: print 'Creating item.' item = NewsItem() headline = entry.title summary = entry.summary item.url_hash = url_hash item.link = link item.source = 'Seattle Times' item.title = headline item.summary = summary item.published_date = published_date item.published_ts = published_ts item.inserted_ts = arrow.utcnow().timestamp item.save()
link = entry.link url_hash = hashlib.md5(link).hexdigest() date = entry.published_parsed published_date = arrow.get(date).to('US/Pacific').date().strftime('%Y-%m-%d') published_ts = arrow.get(date).to('US/Pacific').to('UTC').timestamp # See if we already have this story try: NewsItem.get(NewsItem.url_hash==url_hash) print 'Item exists. Skipping.' continue except peewee.DoesNotExist: print 'Creating item.' item = NewsItem() headline = entry.title summary = entry.summary item.url_hash = url_hash item.link = link item.source = 'Seattle Times' item.title = headline item.summary = summary item.published_date = published_date item.published_ts = published_ts item.inserted_ts = arrow.utcnow().timestamp item.save()
for prefix in skippable_headline_prefixes: if entry.title.startswith(prefix): prefix_match = True if prefix_match: print 'Skipping story' continue # See if we already have this story try: NewsItem.get(NewsItem.url_hash==url_hash) print 'Item exists. Skipping.' continue except peewee.DoesNotExist: print 'Creating item.' item = NewsItem() soup = bs4.BeautifulSoup(entry.description, 'html.parser') item.summary = unidecode.unidecode(soup.text.strip()) item.title = unidecode.unidecode(entry.title) item.url_hash = url_hash item.link = link item.authors = '' item.source = 'Capital WX Gang' item.published_date = published_date item.published_ts = published_ts item.inserted_ts = arrow.utcnow().timestamp item.save()
# See if any of the skippable ids are in the story ids if pcollid in skippable_collection_ids: print 'Skipping %s story' % pcollid continue # If it's also published on weather underground, skip it if 'wunderground' in tags: print 'Skipping Weather Underground Story' continue # See if the story already exists try: item = NewsItem.get(NewsItem.url_hash==url_hash) print 'Item Exists. Skipping.' continue except peewee.DoesNotExist: print 'Creating new item.' item = NewsItem() item.url_hash = url_hash item.title = unidecode.unidecode(entry['title'].strip()) item.summary = unidecode.unidecode(entry['description'].strip()) item.source = "Weather Channel" item.link = link item.published_date = published_date item.published_ts = published_ts item.inserted_ts = arrow.utcnow().timestamp item.save()
# See if any of the skippable ids are in the story ids if pcollid in skippable_collection_ids: print 'Skipping %s story' % pcollid continue # If it's also published on weather underground, skip it if 'wunderground' in tags: print 'Skipping Weather Underground Story' continue # See if the story already exists try: item = NewsItem.get(NewsItem.url_hash == url_hash) print 'Item Exists. Skipping.' continue except peewee.DoesNotExist: print 'Creating new item.' item = NewsItem() item.url_hash = url_hash item.title = unidecode.unidecode(entry['title'].strip()) item.summary = unidecode.unidecode(entry['description'].strip()) item.source = "Weather Channel" item.link = link item.published_date = published_date item.published_ts = published_ts item.inserted_ts = arrow.utcnow().timestamp item.save()
for prefix in skippable_headline_prefixes: if entry.title.startswith(prefix): prefix_match = True if prefix_match: print 'Skipping story' continue # See if we already have this story try: NewsItem.get(NewsItem.url_hash == url_hash) print 'Item exists. Skipping.' continue except peewee.DoesNotExist: print 'Creating item.' item = NewsItem() soup = bs4.BeautifulSoup(entry.description, 'html.parser') item.summary = unidecode.unidecode(soup.text.strip()) item.title = unidecode.unidecode(entry.title) item.url_hash = url_hash item.link = link item.authors = '' item.source = 'Capital WX Gang' item.published_date = published_date item.published_ts = published_ts item.inserted_ts = arrow.utcnow().timestamp item.save()