def test_generate_feed_entries(self): results = [ e for e in feeds.generate_feed_entries( "http://gilesbowkett.blogspot.com/feeds/posts/default") ] for e in results: raw_content = e.get('raw_content') stripped_content = e.get('stripped_content') # debugging # print e.get('title') # print 'stripped_content:\n%s' % stripped_content # print 'raw_content:\n%s' % raw_content # print self.assert_(e.has('title'), "missing: title") self.assert_(e.has('etag'), "missing: etag") self.assert_(e.has('modified'), "missing: modified") print "modified: %s" % e.modified self.assert_(e.has('link'), "missing: link") # TODO: verify no HTML in stripped content # stripped content can be an empty string # not all posts have content, it appears self.assert_(stripped_content is not None, "missing: stripped_content") self.assert_(raw_content is not None, "missing: raw_content")
def get(self, feed_url): feed_url = "http://%s" % feed_url helper = RequestHelper(self) entries = [ dict(title=e.title, link=e.link, content=e.stripped_content, modified=str(e.modified)) for e in generate_feed_entries(feed_url) ] helper.write_json(entries)
def ingest_feed_entries(feed, user, error_call=None): """ yields: (artifact guid, entry) tuple """ # TODO: use etag from previous ingest for entry in feeds.generate_feed_entries(feed.url): try: stripped_content = entry.get("stripped_content") if stripped_content: # ensures this is a non-empty entry link = entry.get("link") raw_modified = entry.get("modified") if raw_modified: modified = datetime(*raw_modified[0:-2]) else: modified = None logging.debug("%s modified %s (%s)" % (link, modified, modified.__class__)) url_resource = UrlResourceAccessor.get_or_create( link, source_modified=modified, feed=feed) # TODO: check if there is already an artifact for this resource info_key, content_key, source_key, created = ArtifactAccessor.find_or_create( source=feed.artifact_source.name, content_type="text/plain", body=stripped_content, url=link, url_resource=url_resource, modified_by=user) yield info_key.name(), entry, created except Exception, e: if error_call: error_call(entry, e) else: raise e
def ingest_feed_entries(feed, user, error_call=None): """ yields: (artifact guid, entry) tuple """ # TODO: use etag from previous ingest for entry in feeds.generate_feed_entries(feed.url): try: stripped_content = entry.get("stripped_content") if stripped_content: # ensures this is a non-empty entry link = entry.get("link") raw_modified = entry.get("modified") if raw_modified: modified = datetime(*raw_modified[0:-2]) else: modified = None logging.debug("%s modified %s (%s)" % (link, modified, modified.__class__)) url_resource = UrlResourceAccessor.get_or_create(link, source_modified=modified, feed=feed) # TODO: check if there is already an artifact for this resource info_key, content_key, source_key, created = ArtifactAccessor.find_or_create(source=feed.artifact_source.name, content_type="text/plain", body=stripped_content, url = link, url_resource=url_resource, modified_by=user) yield info_key.name(), entry, created except Exception, e: if error_call: error_call(entry, e) else: raise e
def test_generate_feed_entries(self): results = [e for e in feeds.generate_feed_entries("http://gilesbowkett.blogspot.com/feeds/posts/default")] for e in results: raw_content = e.get('raw_content') stripped_content = e.get('stripped_content') # debugging # print e.get('title') # print 'stripped_content:\n%s' % stripped_content # print 'raw_content:\n%s' % raw_content # print self.assert_(e.has('title'), "missing: title") self.assert_(e.has('etag'), "missing: etag") self.assert_(e.has('modified'), "missing: modified") print "modified: %s" % e.modified self.assert_(e.has('link'), "missing: link") # TODO: verify no HTML in stripped content # stripped content can be an empty string # not all posts have content, it appears self.assert_(stripped_content is not None, "missing: stripped_content") self.assert_(raw_content is not None, "missing: raw_content")
def get(self, feed_url): feed_url = "http://%s" % feed_url helper = RequestHelper(self) entries = [dict(title=e.title, link=e.link, content=e.stripped_content, modified=str(e.modified)) for e in generate_feed_entries(feed_url)] helper.write_json(entries)