def main(): wf = Web2Feed(sys.argv[1]) wf.set_caching(True) wf.set_robot_rules_policy(True, False) print wf.get_feed() sys.exit() content = get_page(uri) print "Content len: %d" % 0 if not content else len(content) sc = get_scraper(content, uri) #print sc.get_feed() print sc.get_plaintext(80)
def get_feed(self): """Scrape the page for semantic content.""" def get_formatted(scraper): return { 'meta': scraper.get_meta(), 'feed': scraper.get_feed(), } if self.scraper: return get_formatted(self.scraper) if not self.contents and self.do_auto_fetch: print "Web2Feed.get_feed() Attempting to fetch..." self.fetch() if not self.contents: #raise Exception, "No content to scrape!" print "Web2Feed.get_feed() unable to parse: no contents!" return False # TODO: Cache the scraper. self.scraper = get_scraper(self.contents, self.uri) return get_formatted(self.scraper)