Example #1
0
    def save_to_db(self, dic):
        assert all(map(dic.has_key, ['title', 'original_price', 'price', 'detail', 'url'])),\
            "Information incomplete."
        
        url = dic['url']
        original_price = dic['original_price'].text.encode('utf8')
        price = dic['price'].text.encode('utf8')
        title = dic['title'].text # title is unicode
        detail = dic['detail'].renderContents(encoding='utf8')
        detail = utils.clean_detail(detail, self.home_url)
            
        # Data formatting & validation.
        try:
            original_price, price = map(lambda s: int(re.search(r'(\d+)', s).group()),
                                        [original_price, price])
        except TypeError:
            logging.error("Price conversion failed. Detailed info: %s", [original_price, price])
            return
        except AttributeError:
            logging.error("Regex failed on %s", [original_price, price])
            return
        
        if len(title) > 500 or len(title) < 10:
            logging.error("Title length too short or too long : %s", title)
            return
        
        if len(detail) < 20:
            logging.error("Detail too short. %s", detail)
            return

        # Save to db.
        try:
            site = Site.select(Site.q.url == self.home_url)
            assert(site.count() == 1), "%s not found or dups." % self.home_url
            
            title = utils.lstrip(title, [s.decode('utf8') for s in ('今日团购', '今日精选', ':')])
            title = title.strip()
            title='[%s] %s' % (site[0].name, title)
            
            city_name = self.index_urls[url]
            city = City.select(City.q.name == city_name.decode('utf8'))
            assert city.count() == 1, "%s not found or dups." % city_name
            cityID = city[0].id
            
            if Deal.select(AND(Deal.q.title == title, Deal.q.cityID == cityID)).count() > 0:
                logging.info("Title dups %s" % title)
                return
            deal = Deal(url=url, title=title, price=price, originalPrice=original_price,
                        detail=detail.decode('utf8'),cityID=cityID, siteID=site[0].id)
            logging.info('%s OK', url)
        except:
            # Simple handling for the moment.
            logging.error("Error occured while saving data : %s", sys.exc_info())
Example #2
0
import webbrowser

import sqlobject

import settings
from models import Deal

conn = sqlobject.connectionForURI(settings.CONNECTION_STRING)
sqlobject.sqlhub.processConnection = conn

deals = Deal.select()
f = open('dump.html', 'wb')

print >> f, """
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
</head>
<body>
"""
for deal in deals:
    print >> f, '<li>'
    print >> f, '<ul>'
    print >> f, '<li>%s</li>' % deal.title.encode('utf8')
    print >> f, '<li>%s/%s</li>' % (deal.price, deal.originalPrice)
    print >> f, '<li>%s</li>' % deal.detail.encode('utf8')
    print >> f, '</ul>'
    print >> f, '</li>'
print >> f, '</ul>'
print >> f, '</body>'
print >> f, '</html>'