def parse_feed(self, entry): 'Extract list of articles from the feed.' articles = [] (url, publisher, publisher_location) = entry try: c = urlopen(url) except URLError: print 'Failed to fetch ' + url feed = feedparser.parse(c) # for e in feed.entries[:1]: # read just the first entry while debugging for e in feed.entries: image_link = None image_type = None for link in e.links: if link['rel'] == 'enclosure': image_link = link['href'] image_type = link['type'] article = Article( publisher=publisher, publisher_location=publisher_location, published_date=e.updated_parsed, title=e.title, link=e.link, image_link=image_link, image_type=image_type) content = self.htmlparser.parse(e.link) m = re.search(r'-\s*([a-zA-Z]+(,?\s+[a-zA-Z]+){0,6})$', content) if m: article.source = m.group(1) article.content = re.sub(r'(\\n)?\s*-\s*([a-zA-Z]+(,?\s+[a-zA-Z]+){0,6})$', '', content) article.store(self.db) # put article and word frequencies into couchdb articles.append(article) return articles