def save_to_db(self, dic): assert all(map(dic.has_key, ['title', 'original_price', 'price', 'detail', 'url'])),\ "Information incomplete." url = dic['url'] original_price = dic['original_price'].text.encode('utf8') price = dic['price'].text.encode('utf8') title = dic['title'].text # title is unicode detail = dic['detail'].renderContents(encoding='utf8') detail = utils.clean_detail(detail, self.home_url) # Data formatting & validation. try: original_price, price = map(lambda s: int(re.search(r'(\d+)', s).group()), [original_price, price]) except TypeError: logging.error("Price conversion failed. Detailed info: %s", [original_price, price]) return except AttributeError: logging.error("Regex failed on %s", [original_price, price]) return if len(title) > 500 or len(title) < 10: logging.error("Title length too short or too long : %s", title) return if len(detail) < 20: logging.error("Detail too short. %s", detail) return # Save to db. try: site = Site.select(Site.q.url == self.home_url) assert(site.count() == 1), "%s not found or dups." % self.home_url title = utils.lstrip(title, [s.decode('utf8') for s in ('今日团购', '今日精选', ':')]) title = title.strip() title='[%s] %s' % (site[0].name, title) city_name = self.index_urls[url] city = City.select(City.q.name == city_name.decode('utf8')) assert city.count() == 1, "%s not found or dups." % city_name cityID = city[0].id if Deal.select(AND(Deal.q.title == title, Deal.q.cityID == cityID)).count() > 0: logging.info("Title dups %s" % title) return deal = Deal(url=url, title=title, price=price, originalPrice=original_price, detail=detail.decode('utf8'),cityID=cityID, siteID=site[0].id) logging.info('%s OK', url) except: # Simple handling for the moment. logging.error("Error occured while saving data : %s", sys.exc_info())
import webbrowser import sqlobject import settings from models import Deal conn = sqlobject.connectionForURI(settings.CONNECTION_STRING) sqlobject.sqlhub.processConnection = conn deals = Deal.select() f = open('dump.html', 'wb') print >> f, """ <html> <head> <meta http-equiv="content-type" content="text/html; charset=UTF-8"> </head> <body> """ for deal in deals: print >> f, '<li>' print >> f, '<ul>' print >> f, '<li>%s</li>' % deal.title.encode('utf8') print >> f, '<li>%s/%s</li>' % (deal.price, deal.originalPrice) print >> f, '<li>%s</li>' % deal.detail.encode('utf8') print >> f, '</ul>' print >> f, '</li>' print >> f, '</ul>' print >> f, '</body>' print >> f, '</html>'