def __init__(self, feedRes): self.isSkip = True self.feedParser = FeedParser() self.feedRes = feedRes self.initFeedParser(feedRes.url)
class FeedReader: noDuplicate = False def __init__(self, feedRes): self.isSkip = True self.feedParser = FeedParser() self.feedRes = feedRes self.initFeedParser(feedRes.url) #self.feedResUpdateTime = None def initFeedParser(self, url): try: if url.startswith('http'): self.feedParser.parse(url=url) else: self.feedParser.parse(file=url) if self.feedParser.isAvailable(): self._fillFeedResPubDate() # fill feed resource pubdate except urllib.error.URLError: self.isSkip = True print("Can't connect to {}".format(url)) def getFeedItems(self, allowDuplicate=True): items = [] if self.isSkip: return items itemsRaw = self.feedParser.findall('.//item') for i in itemsRaw: feedItem = self._generateFeedItemFromNode(i) #print('data: {!s}\tdate:{}'.format(feedItem, FeedResUpdateTime.get(self.feedRes.id))) if allowDuplicate or self._isFeedNew(feedItem):#feedIsNew: items.append(feedItem) return items def _generateFeedItemFromNode(self, node): title = self._getTextIfNotNone(node.find('title')) link = self._getTextIfNotNone(node.find('link')) pubDate = self._getTextIfNotNone(node.find('pubDate')) desc = self._getTextIfNotNone(node.find('description')) return FeedItem(title, pubDate, link, desc, self.feedRes.id) def _isFeedNew(self, feedItem): feedResUpdateTime = FeedResUpdateTime.get(self.feedRes.id) isFeedUpdated = feedItem.isUpdated(feedResUpdateTime) notInCache = True if not Cache.isEmpty(): notInCache = not Cache.isExist(feedItem) return isFeedUpdated and notInCache def _fillFeedResPubDate(self): try: newPubDate = str2Time(self._getResPubDate()) # print('FeedURL: {}\told: {}\tnew: {}'.format( # self.feedRes.url # , self.feedRes.pubDate # , newPubDate)) if self.feedRes.isUpdated(newPubDate): self.feedRes.pubDate = newPubDate ResourceOperator().addFeedResUpdateTime(self.feedRes) self.isSkip = False # print('skip feed: {}'.format(self.feedRes.url)) except e: print("Error occured",e) def _getResPubDate(self): pubDateNode = self.feedParser.find('.//lastBuildDate') if pubDateNode == None: pubDateNode = self.feedParser.find('.//pubDate') return pubDateNode.text def _isUpdated(self, feedResId): return false def _getTextIfNotNone(self, element): if element is not None: return element.text else: return object()