def _validate_result(self, result: RawFeedResult) -> RawFeedResult: storys = [] try: feed = validate_raw_feed(result.feed) for i, s in enumerate(result.storys): with mark_index(i): s = validate_raw_story(s) storys.append(s) except Invalid as ex: raise FeedParserError(str(ex)) from ex return RawFeedResult(feed, storys, warnings=result.warnings)
def _validate_result(self, result: FeedResult) -> FeedResult: storys = [] try: feed = validate_feed(result.feed) for i, s in enumerate(result.storys): with mark_index(i): s = validate_story(s) storys.append(s) except Invalid as ex: raise FeedParserError(str(ex)) from ex return FeedResult(feed, storys, checksum=result.checksum)
def _parse(content, headers, validate=True): """解析Feed,返回结果可以pickle序列化,便于多进程中使用""" stream = BytesIO(content) feed = feedparser.parse( stream, response_headers=headers, sanitize_html=False, resolve_relative_uris=False, ) bozo = feed.bozo if not feed.bozo: # 没有title的feed视为错误 title = feed.feed.get("title") if not title: bozo = 1 bozo_exception = "the feed no title, considered not a feed." else: bozo = 0 bozo_exception = "" else: bozo = feed.bozo ex = feed.get("bozo_exception") if not ex: bozo_exception = "" else: name = type(ex).__module__ + "." + type(ex).__name__ bozo_exception = f"{name}: {ex}" if validate: feed_info = validate_feed(feed.feed) entries = [] for i, x in enumerate(feed.entries): with mark_index(i): entries.append(validate_story(x)) else: feed_info = feed.feed entries = feed.entries version = feed.get("version") or "" result = FeedParserResult( feed=feed_info, entries=entries, version=version, bozo=bozo, bozo_exception=bozo_exception, ) return result
def test_mark_key_index(): with mark_key('key'): with mark_index(0): with mark_index(): raise Invalid('invalid')
def test_mark_index_uncertainty(): with mark_index(): raise Invalid('invalid')
def test_mark_index(): with mark_index(0): raise Invalid('invalid')