def get(self): force_fetch = bool(self.request.get('force_fetch')) response = urllib2.urlopen('http://sfbay.craigslist.org/sfc/mis/') html = response.read() soup = BeautifulSoup(html) all_the_post_urls = [anchor['href'] for anchor in soup.findAll('a') if anchor.parent.name == 'p' ] count = 0 for url in all_the_post_urls: res = urllib2.urlopen(url) post_html = res.read() post_soup = BeautifulSoup(post_html) dt = helper.parse_date(post_soup) post_id = helper.parse_id_from_url(url) if (not MisHTML.get_by_key_name(post_id)) or force_fetch: m_h = MisHTML(html = unicode(post_html, errors='ignore'), posted = dt, key_name = post_id) m_h.put() count += 1 else: break; self.response.out.write('Found ' + str(count))