Exemple #1
0
  def checkUpdate(self):
    """Check for an update, and put it in self.items"""
    items = []
    maxPages = 7

    # Check for multiple pages of sales
    for page in range(1, maxPages):
      data = urlfetch.fetch(self.SPECIALS_PAGE_URL % page).read()
      new_items = re.findall(self.regex, data)
      self.log.debug("Found %d sales for page %d" % (len(new_items), page))
      items.extend(new_items)

      # Stop checking pages after an empty one
      if len(new_items) == 0:
        break
        

    sales = []

    for item in items:
      try:
        match = self.dataregex.search(item)
        link, price, sale_price, title = match.groups()
        sales.append((title, price, sale_price, link))
      except AttributeError, e:
        try:
          match = self.datafreeregex.search(item)
          link, title = match.groups()
          sales.append((title, 0, 0, link))
        except AttributeError, e:
          match = self.dataemptyregex.search(item)
          if match is None:
            self.log.warning("Regex didn't match item: %s" % item)
  def checkUpdate(self):
    # Get data from source
    data = urlfetch.fetch(self.source).read()

    if self.state is None:
      self.state = data
    elif self.state != data:
      self.state = data
      self.items.append(SimpleWebsiteItem(self.source))
 def checkUpdate(self):
   """Check for an update, and put it in self.items"""
   # Get website data
   data = urlfetch.fetch(self.source).read()
   
   # Check for an update
   match = self.regex.search(data)
   if match is None:
     raise Exception("No match for regex on %s" % self.source)
   elif self.lastmatch is None:
     self.lastmatch = match.group(0)
   elif self.lastmatch != match.group(0):
     self.lastmatch = match.group(0)
     self.items.append(RegexWebsiteItem(self.source))
Exemple #4
0
  def checkUpdate(self):
    """Check for an update, and put it in self.items"""
    # Pull feed data
    feed = urlfetch.fetch(self.source).read()
    data = minidom.parseString(feed)

    # Check if there is a new item
    items = data.getElementsByTagName("entry")
    latest = items[0].getElementsByTagName("title")[0].firstChild.data
    # Constuct RSS items
    for item in reversed(items):
      # Loop through ones we've already done
      if self.latest is not None and item.getElementsByTagName("title")[0].firstChild.data == self.latest:
        continue
      self.items.append(AtomItem(item, {'source':self.source}))
    self.latest = latest
Exemple #5
0
  def checkUpdate(self):
    """Check for an update, and put it in self.items"""
    # Pull feed data
    feed = urlfetch.fetch(self.source).read()
    data = minidom.parseString(feed)

    # Check if there are new items.  NOTE: The feed has items from newer->older,
    # but we want the items to be older->newer so reverse iterate
    items = data.getElementsByTagName("item")
    # Start with the oldest item
    i = len(items) - 1
    while i >= 0 and self.latest is not None:
      # Loop through items older than the newest one we saw last time
      if (getItemHash(items[i]) == self.latest):
        i -= 1
        break
      i -= 1

    while i >= 0:
      # Remaining items are new, so add them oldest->newest
      self.items.append(RSSItem(items[i], {'source':self.source}))
      i -= 1

    self.latest = getItemHash(items[0])