def get_last_pasties(self): # reset the pasties list pasties = [] # populate queue with data response = self.user_agent.download_url(self.archive_url) if not response: logger.warning( "Failed to download page {url}".format(url=self.archive_url)) return False htmlPage = response.text if not htmlPage: logger.warning( "No HTML content for page {url}".format(url=self.archive_url)) return False pasties_ids = self.re.findall(self.archive_regex, htmlPage) if pasties_ids: for pastie_id in pasties_ids: # check if the pastie was already downloaded # and remember that we've seen it if self.seen_pastie_and_remember(pastie_id): # do not append the seen things again in the queue continue # pastie was not downloaded yet. Add it to the queue if self.pastie_class: pastie = self.pastie_class(self, pastie_id) else: pastie = Pastie(self, pastie_id) pasties.append(pastie) return pasties logger.error( "No last pasties matches for regular expression site:{site} regex:{regex}. Error in your regex? Dumping htmlPage \n {html}" .format(site=self.name, regex=self.archive_regex, html=htmlPage)) return False
def __init__(self, site, pastie_id): Pastie.__init__(self, site, pastie_id)