def checkUpdate(self): """Check for an update, and put it in self.items""" items = [] maxPages = 7 # Check for multiple pages of sales for page in range(1, maxPages): data = urlfetch.fetch(self.SPECIALS_PAGE_URL % page).read() new_items = re.findall(self.regex, data) self.log.debug("Found %d sales for page %d" % (len(new_items), page)) items.extend(new_items) # Stop checking pages after an empty one if len(new_items) == 0: break sales = [] for item in items: try: match = self.dataregex.search(item) link, price, sale_price, title = match.groups() sales.append((title, price, sale_price, link)) except AttributeError, e: try: match = self.datafreeregex.search(item) link, title = match.groups() sales.append((title, 0, 0, link)) except AttributeError, e: match = self.dataemptyregex.search(item) if match is None: self.log.warning("Regex didn't match item: %s" % item)
def checkUpdate(self): # Get data from source data = urlfetch.fetch(self.source).read() if self.state is None: self.state = data elif self.state != data: self.state = data self.items.append(SimpleWebsiteItem(self.source))
def checkUpdate(self): """Check for an update, and put it in self.items""" # Get website data data = urlfetch.fetch(self.source).read() # Check for an update match = self.regex.search(data) if match is None: raise Exception("No match for regex on %s" % self.source) elif self.lastmatch is None: self.lastmatch = match.group(0) elif self.lastmatch != match.group(0): self.lastmatch = match.group(0) self.items.append(RegexWebsiteItem(self.source))
def checkUpdate(self): """Check for an update, and put it in self.items""" # Pull feed data feed = urlfetch.fetch(self.source).read() data = minidom.parseString(feed) # Check if there is a new item items = data.getElementsByTagName("entry") latest = items[0].getElementsByTagName("title")[0].firstChild.data # Constuct RSS items for item in reversed(items): # Loop through ones we've already done if self.latest is not None and item.getElementsByTagName("title")[0].firstChild.data == self.latest: continue self.items.append(AtomItem(item, {'source':self.source})) self.latest = latest
def checkUpdate(self): """Check for an update, and put it in self.items""" # Pull feed data feed = urlfetch.fetch(self.source).read() data = minidom.parseString(feed) # Check if there are new items. NOTE: The feed has items from newer->older, # but we want the items to be older->newer so reverse iterate items = data.getElementsByTagName("item") # Start with the oldest item i = len(items) - 1 while i >= 0 and self.latest is not None: # Loop through items older than the newest one we saw last time if (getItemHash(items[i]) == self.latest): i -= 1 break i -= 1 while i >= 0: # Remaining items are new, so add them oldest->newest self.items.append(RSSItem(items[i], {'source':self.source})) i -= 1 self.latest = getItemHash(items[0])