Exemple #1
0
 def __init__(self, feedRes):
   self.isSkip = True
   self.feedParser = FeedParser()
   self.feedRes = feedRes
   self.initFeedParser(feedRes.url)
Exemple #2
0
class FeedReader:
  noDuplicate = False
  def __init__(self, feedRes):
    self.isSkip = True
    self.feedParser = FeedParser()
    self.feedRes = feedRes
    self.initFeedParser(feedRes.url)
    #self.feedResUpdateTime = None

  def initFeedParser(self, url):
    try:
      if url.startswith('http'):
        self.feedParser.parse(url=url)
      else:
        self.feedParser.parse(file=url)
      if self.feedParser.isAvailable():
        self._fillFeedResPubDate() # fill feed resource pubdate
    except urllib.error.URLError:
      self.isSkip = True
      print("Can't connect to {}".format(url))
    
  def getFeedItems(self, allowDuplicate=True):
    items = []
    if self.isSkip:
      return items
    itemsRaw = self.feedParser.findall('.//item')
    for i in itemsRaw:
      feedItem = self._generateFeedItemFromNode(i)
      #print('data: {!s}\tdate:{}'.format(feedItem, FeedResUpdateTime.get(self.feedRes.id)))
      if allowDuplicate or self._isFeedNew(feedItem):#feedIsNew:
        items.append(feedItem)
    return items

  def _generateFeedItemFromNode(self, node):
    title = self._getTextIfNotNone(node.find('title'))
    link = self._getTextIfNotNone(node.find('link'))
    pubDate = self._getTextIfNotNone(node.find('pubDate'))
    desc = self._getTextIfNotNone(node.find('description'))
    return FeedItem(title, pubDate, link, desc, self.feedRes.id)
    
  def _isFeedNew(self, feedItem):
    feedResUpdateTime = FeedResUpdateTime.get(self.feedRes.id) 
    isFeedUpdated = feedItem.isUpdated(feedResUpdateTime)
    notInCache = True
    if not Cache.isEmpty():
      notInCache = not Cache.isExist(feedItem)

    return isFeedUpdated and notInCache

  def _fillFeedResPubDate(self):
    try:
      newPubDate = str2Time(self._getResPubDate())
#      print('FeedURL: {}\told: {}\tnew: {}'.format(
#                                                    self.feedRes.url
#                                                  , self.feedRes.pubDate
#                                                  , newPubDate))
      if self.feedRes.isUpdated(newPubDate):
        self.feedRes.pubDate = newPubDate
        ResourceOperator().addFeedResUpdateTime(self.feedRes)
        self.isSkip = False
      #  print('skip feed: {}'.format(self.feedRes.url))
    except e:
      print("Error occured",e)

  def _getResPubDate(self):
    pubDateNode = self.feedParser.find('.//lastBuildDate')
    if pubDateNode == None:
      pubDateNode = self.feedParser.find('.//pubDate')
    return pubDateNode.text 
    
  def _isUpdated(self, feedResId):
    return false

  def _getTextIfNotNone(self, element):
    if element is not None:
      return element.text
    else:
      return object()