Example #1
0
File: main.py Project: awans/mis
 def get(self):
   force_fetch = bool(self.request.get('force_fetch'))
   response = urllib2.urlopen('http://sfbay.craigslist.org/sfc/mis/')
   html = response.read()
   soup = BeautifulSoup(html)
   all_the_post_urls = [anchor['href'] for anchor 
     in soup.findAll('a') if anchor.parent.name == 'p' ]
   
   count = 0
   
   for url in all_the_post_urls:
     res = urllib2.urlopen(url)
     post_html = res.read()
     post_soup = BeautifulSoup(post_html)
     dt = helper.parse_date(post_soup)
     post_id = helper.parse_id_from_url(url)
     
     if (not MisHTML.get_by_key_name(post_id)) or force_fetch:
       m_h = MisHTML(html = unicode(post_html, errors='ignore'), posted = dt, key_name = post_id)
       m_h.put()
       count += 1
     else:
       break;
     
   self.response.out.write('Found ' + str(count))
Example #2
0
File: main.py Project: awans/mis
 def get(self):
   count = 0
   for m_h in MisHTML.all().filter('parsed_version <', PARSER_VERSION):
     post_dict = helper.parse_post(m_h.html, m_h.key().name())
     m = Mis(key_name = post_dict['id'],
             html = m_h.html, 
             post_id = post_dict['id'],
             url = post_dict['url'],
             title = post_dict['title'],
             body = post_dict['body'],
             location = post_dict['location'],
             age = post_dict['age'],
             me_gender = post_dict['me_gender'],
             you_gender = post_dict['you_gender'],
             posted = m_h.posted)
     m.put()
     m_h.parsed_version = PARSER_VERSION
     m_h.put()
     count += 1
     
   self.response.out.write('Parsed ' + str(count))