Example #1
0
File: main.py Project: awans/mis
 def get(self):
   force_fetch = bool(self.request.get('force_fetch'))
   response = urllib2.urlopen('http://sfbay.craigslist.org/sfc/mis/')
   html = response.read()
   soup = BeautifulSoup(html)
   all_the_post_urls = [anchor['href'] for anchor 
     in soup.findAll('a') if anchor.parent.name == 'p' ]
   
   count = 0
   
   for url in all_the_post_urls:
     res = urllib2.urlopen(url)
     post_html = res.read()
     post_soup = BeautifulSoup(post_html)
     dt = helper.parse_date(post_soup)
     post_id = helper.parse_id_from_url(url)
     
     if (not MisHTML.get_by_key_name(post_id)) or force_fetch:
       m_h = MisHTML(html = unicode(post_html, errors='ignore'), posted = dt, key_name = post_id)
       m_h.put()
       count += 1
     else:
       break;
     
   self.response.out.write('Found ' + str(count))