Beispiel #1
0
	def buildContent(self, item):
		content = Content()	
		pub_date = item.findtext('pubDate')
		if pub_date:
			content.publication_date = datetime.strptime(pub_date, self.date_format)
		content.web_url = db.Link(item.findtext('link'))
		self.parse_item_media(item, content)
		return content
Beispiel #2
0
	def get(self, id):
		json_content = {}
		content = Content.all().filter('id =', id).fetch(1)[0]
		json_content['id'] = id
		json_content['headline'] = content.headline
		json_content['thumbnail'] = self.buildPicture(content.thumbnail)
		json_content['summary'] = content.trail_text
		json_content['detail_url'] = self.detail_url + id
		json_content['section_name'] = content.section_name
		
		self.returnJSON(json_content)
Beispiel #3
0
	def get(self):
		logging.info("Deleting old content")
		print("Deleting old content" + "\n")
		old_content = Content.all().filter('publication_date >', datetime.now() - timedelta(-1))
		
		count = 0
		for content in old_content:
			count = count + 1
			self.response.out.write(content.key() + "\n")
			taskqueue.add(url='/task/deleteold', params={'key': content.key()})
			
		logging.info("Marked %d content for deletion" % count)
Beispiel #4
0
	def buildContent(self, content_id):
		json = {}
		content = Content.all().filter('id =', content_id).fetch(1)[0]
		json['id'] = content_id
		json['byline'] = content.byline
		json['publication'] = content.publication
		json['section_name'] = content.section_name 
		json['headline'] = content.headline
		json['web_url'] = content.web_url
		json['trail_text'] = content.trail_text
		json['link_text'] = content.link_text
		json['type'] = content.type
		json['body'] = content.body
		json['publication_date'] = content.publication_date
			
		json['tags'] = self.buildTags(content.tags)
		
		json['pictures'] = self.buildPictures(content.pictures)	
		
		return json
Beispiel #5
0
			try:
				rss_feed = urllib2.urlopen(req)
			except urllib2.HTTPError, e:
				logging.info("RSS still valid for: " + path)
				return #rss not updated: win!
		else:
			rss_feed = urllib2.urlopen(url)
			
		feed_item.last_modified = rss_feed.headers['date']	
		feed_item.content = []
		#process the feed
		for event, elem in ET.iterparse(rss_feed):
			if elem.tag == "item":
				link = elem.findtext("link")
				if re.search(r'guardian.co.uk', link):
					content = Content.all().filter('web_url =', link).fetch(1)
					if not content:
						content = self.buildContent(elem)
						key = content.put()
						feed_item.content.append(key)
						taskqueue.add(url='/task/web', params={'key': key})
					else:
						content = content[0]
						feed_item.content.append(content.put())
					elem.clear() # won't need the children any more
				else:
					logging.info("None guardian url, bailing (%s)" % link)
					content.delete()
				
		feed_item.put()