def buildContent(self, item): content = Content() pub_date = item.findtext('pubDate') if pub_date: content.publication_date = datetime.strptime(pub_date, self.date_format) content.web_url = db.Link(item.findtext('link')) self.parse_item_media(item, content) return content
def get(self, id): json_content = {} content = Content.all().filter('id =', id).fetch(1)[0] json_content['id'] = id json_content['headline'] = content.headline json_content['thumbnail'] = self.buildPicture(content.thumbnail) json_content['summary'] = content.trail_text json_content['detail_url'] = self.detail_url + id json_content['section_name'] = content.section_name self.returnJSON(json_content)
def get(self): logging.info("Deleting old content") print("Deleting old content" + "\n") old_content = Content.all().filter('publication_date >', datetime.now() - timedelta(-1)) count = 0 for content in old_content: count = count + 1 self.response.out.write(content.key() + "\n") taskqueue.add(url='/task/deleteold', params={'key': content.key()}) logging.info("Marked %d content for deletion" % count)
def buildContent(self, content_id): json = {} content = Content.all().filter('id =', content_id).fetch(1)[0] json['id'] = content_id json['byline'] = content.byline json['publication'] = content.publication json['section_name'] = content.section_name json['headline'] = content.headline json['web_url'] = content.web_url json['trail_text'] = content.trail_text json['link_text'] = content.link_text json['type'] = content.type json['body'] = content.body json['publication_date'] = content.publication_date json['tags'] = self.buildTags(content.tags) json['pictures'] = self.buildPictures(content.pictures) return json
try: rss_feed = urllib2.urlopen(req) except urllib2.HTTPError, e: logging.info("RSS still valid for: " + path) return #rss not updated: win! else: rss_feed = urllib2.urlopen(url) feed_item.last_modified = rss_feed.headers['date'] feed_item.content = [] #process the feed for event, elem in ET.iterparse(rss_feed): if elem.tag == "item": link = elem.findtext("link") if re.search(r'guardian.co.uk', link): content = Content.all().filter('web_url =', link).fetch(1) if not content: content = self.buildContent(elem) key = content.put() feed_item.content.append(key) taskqueue.add(url='/task/web', params={'key': key}) else: content = content[0] feed_item.content.append(content.put()) elem.clear() # won't need the children any more else: logging.info("None guardian url, bailing (%s)" % link) content.delete() feed_item.put()