Example #1
0
    def __init__(self):
        # db
        self.mysqlAccess = MysqlAccess()  # mysql access

        self.brand_names = [
            "艾沃", "保宁", "红太阳", "常润", "厨邦", "大华", "大同", "大王", "德馨斋", "德阳", "东古",
            "凤球唛", "福临门", "高真", "古龙", "冠生园", "广味源", "广祥泰", "龟甲万", "国味威", "海鸥",
            "海天", "好伴", "禾然", "和田宽", "恒顺", "湖西岛", "湖羊", "黄花园", "南食召", "吉成",
            "济美", "加加", "金冠园", "金兰", "金狮", "有味家", "金苏", "荆楚", "景山", "居易",
            "聚百鲜", "科沁万佳", "孔膳坊", "快鹿", "阆中", "老才臣", "食味的初相", "老蔡", "老恒和",
            "老潘头", "李锦记", "利民", "六月鲜", "春峰", "龙菲", "秋生饭堂", "龙牌", "隆昌", "楼茂记",
            "鲁花自然鲜", "云上", "禄荣", "麻旺", "美富达", "美极", "美味源", "蒙古真", "渔山隐",
            "米吉真味", "酿一村", "盘溪", "彭万春", "浦源", "奇峰", "千禾", "千鹤屋", "粮赞", "钱万隆",
            "清净园", "清香园", "仁昌记", "三不加", "悦意", "三和四美", "博爱酵园", "山古坊", "膳府",
            "膳之堂", "盛田", "四海", "寺冈", "苏美", "太太乐", "泰康", "唐人基", "唐世家", "淘大",
            "腾安", "同珍", "妥甸", "拓东", "丸天", "万通", "万字", "味事达", "五天", "犀浦", "仙家",
            "先市", "鲜渡", "咸亨", "香满园", "小东字", "笑厨", "新宇", "星湖", "徐同泰", "薛泰丰",
            "扬名", "尧记", "肴易食", "一统原创", "一休屋", "伊例家", "宜赤必", "优和", "鱼味鲜", "禹谟",
            "玉堂", "御酿坊", "缘木记", "粤香园", "灶基", "詹王", "张家三嫂", "长寿结", "珍极", "正信",
            "正阳河", "至味", "致美斋", "中邦", "中冷泉", "中调", "珠江桥", "梓山", "自然集", "佐参王",
            "佐香园", "中坝", "天府", "南吉", "清湖", "味华", "佐餐王", "一品江南", "金顶", "玉河",
            "巧媳妇", "齐鲁", "梁山好汉", "王家园子", "食圣", "山口", "川鹰", "德通", "新汶", "四海",
            "德馨斋", "玉兔", "灯塔", "仙鹤", "宏林", "贵族王中王", "万和", "口珍", "同福永", "威极",
            "嘉美乐", "天浩圆", "铁鸟", "恒裕", "周太", "海鸥", "太阳岛", "百花", "小神厨", "龙菲",
            "太和", "天一", "美乐", "三汇", "通海", "黑珍珠", "百乐", "吉鹤村", "岭桥", "瓦缸",
            "味莼园", "百花串", "锦酿", "福香居", "铁石", "石桥", "清华", "味邦", "光华", "罕王",
            "营宝", "非常", "大有丰", "沙陀", "味味晓", "云晓", "巧妈妈", "振龙", "乾缘", "稻香园",
            "一品斋", "孔雀", "武大郎", "绿芳", "天赐", "益彰", "建洛", "天口", "一品江南", "机轮",
            "溢美堂", "山乡", "榕江", "嘉乐美", "万路通", "肖大妈", "争荣", "仙源", "敬义泰", "昆湖",
            "鼎兴", "临江寺", "迈进", "玉和", "通德", "民天", "胡玉美", "楼茂记", "鼎丰", "古灯",
            "槐茂", "榕城", "BB", "汉记", "松城", "森江", "美狮", "龙华", "启航", "隆邦", "新汶",
            "四海", "龙之味", "北康", "金玉兰", "小二黑", "吉成"
        ]
Example #2
0
    def __init__(self):
        # 抓取设置
        #self.crawler     = MyCrawler()
        self.crawler = RetryCrawler()

        # db
        self.mysqlAccess = MysqlAccess()  # mysql access

        # 品牌官网链接
        self.home_url = 'http://www.taobao.com'
        self.refers = None

        # 抓取商品列表
        self.link_list = []
        self.items = []

        self.begin_time = Common.now()
Example #3
0
    def __init__(self, home_url, brand_type, thread_num=10):
        # parent construct
        MyThread.__init__(self, thread_num)

        # db
        self.mysqlAccess  = MysqlAccess() # mysql access

        # thread lock
        self.mutex        = threading.Lock()

        self.home_url     = home_url

        self.brand_type   = brand_type

        # activity items
        self.items        = []

        # give up item, retry too many times
        self.giveup_items = []
Example #4
0
class updatetaobaoBrand():
    '''A class of update taobao Brand'''
    def __init__(self):
        # db
        self.mysqlAccess  = MysqlAccess() # mysql access

        self.brand_names = ["艾沃","保宁","红太阳","常润","厨邦","大华","大同","大王","德馨斋","德阳","东古","凤球唛","福临门","高真","古龙","冠生园","广味源","广祥泰","龟甲万","国味威","海鸥","海天","好伴","禾然","和田宽","恒顺","湖西岛","湖羊","黄花园","南食召","吉成","济美","加加","金冠园","金兰","金狮","有味家","金苏","荆楚","景山","居易","聚百鲜","科沁万佳","孔膳坊","快鹿","阆中","老才臣","食味的初相","老蔡","老恒和","老潘头","李锦记","利民","六月鲜","春峰","龙菲","秋生饭堂","龙牌","隆昌","楼茂记","鲁花自然鲜","云上","禄荣","麻旺","美富达","美极","美味源","蒙古真","渔山隐","米吉真味","酿一村","盘溪","彭万春","浦源","奇峰","千禾","千鹤屋","粮赞","钱万隆","清净园","清香园","仁昌记","三不加","悦意","三和四美","博爱酵园","山古坊","膳府","膳之堂","盛田","四海","寺冈","苏美","太太乐","泰康","唐人基","唐世家","淘大","腾安","同珍","妥甸","拓东","丸天","万通","万字","味事达","五天","犀浦","仙家","先市","鲜渡","咸亨","香满园","小东字","笑厨","新宇","星湖","徐同泰","薛泰丰","扬名","尧记","肴易食","一统原创","一休屋","伊例家","宜赤必","优和","鱼味鲜","禹谟","玉堂","御酿坊","缘木记","粤香园","灶基","詹王","张家三嫂","长寿结","珍极","正信","正阳河","至味","致美斋","中邦","中冷泉","中调","珠江桥","梓山","自然集","佐参王","佐香园","中坝","天府","南吉","清湖","味华","佐餐王","一品江南","金顶","玉河","巧媳妇","齐鲁","梁山好汉","王家园子","食圣","山口","川鹰","德通","新汶","四海","德馨斋","玉兔","灯塔","仙鹤","宏林","贵族王中王","万和","口珍","同福永","威极","嘉美乐","天浩圆","铁鸟","恒裕","周太","海鸥","太阳岛","百花","小神厨","龙菲","太和","天一","美乐","三汇","通海","黑珍珠","百乐","吉鹤村","岭桥","瓦缸","味莼园","百花串","锦酿","福香居","铁石","石桥","清华","味邦","光华","罕王","营宝","非常","大有丰","沙陀","味味晓","云晓","巧妈妈","振龙","乾缘","稻香园","一品斋","孔雀","武大郎","绿芳","天赐","益彰","建洛","天口","一品江南","机轮","溢美堂","山乡","榕江","嘉乐美","万路通","肖大妈","争荣","仙源","敬义泰","昆湖","鼎兴","临江寺","迈进","玉和","通德","民天","胡玉美","楼茂记","鼎丰","古灯","槐茂","榕城","BB","汉记","松城","森江","美狮","龙华","启航","隆邦","新汶","四海","龙之味","北康","金玉兰","小二黑","吉成"]

    def update_item_brand(self):
        items = self.mysqlAccess.get_allitems()
        for item in items:
            brandnames = ''
            item_id, item_name = item
            for brandname in self.brand_names:
                if item_name.find(brandname) != -1:
                    brandnames += brandname + '|'
            if brandnames != '':
                print item_id, item_name, brandnames[:-1]
                self.mysqlAccess.update_item_brand((brandnames[:-1],str(item_id)))
Example #5
0
    def __init__(self):
        # 抓取设置
        #self.crawler     = MyCrawler()
        self.crawler     = RetryCrawler()

        # db
        self.mysqlAccess  = MysqlAccess() # mysql access

        # 品牌官网链接
        self.home_url   = 'http://www.taobao.com'
        self.refers     = None

        # 抓取商品列表
        self.link_list  = []
        self.items      = []
        self.brand_names = ["艾沃","保宁","红太阳","常润","厨邦","大华","大同","大王","德馨斋","德阳","东古","凤球唛","福临门","高真","古龙","冠生园","广味源","广祥泰","龟甲万","国味威","海鸥","海天","好伴","禾然","和田宽","恒顺","湖西岛","湖羊","黄花园","南食召","吉成","济美","加加","金冠园","金兰","金狮","有味家","金苏","荆楚","景山","居易","聚百鲜","科沁万佳","孔膳坊","快鹿","阆中","老才臣","食味的初相","老蔡","老恒和","老潘头","李锦记","利民","六月鲜","春峰","龙菲","秋生饭堂","龙牌","隆昌","楼茂记","鲁花自然鲜","云上","禄荣","麻旺","美富达","美极","美味源","蒙古真","渔山隐","米吉真味","酿一村","盘溪","彭万春","浦源","奇峰","千禾","千鹤屋","粮赞","钱万隆","清净园","清香园","仁昌记","三不加","悦意","三和四美","博爱酵园","山古坊","膳府","膳之堂","盛田","四海","寺冈","苏美","太太乐","泰康","唐人基","唐世家","淘大","腾安","同珍","妥甸","拓东","丸天","万通","万字","味事达","五天","犀浦","仙家","先市","鲜渡","咸亨","香满园","小东字","笑厨","新宇","星湖","徐同泰","薛泰丰","扬名","尧记","肴易食","一统原创","一休屋","伊例家","宜赤必","优和","鱼味鲜","禹谟","玉堂","御酿坊","缘木记","粤香园","灶基","詹王","张家三嫂","长寿结","珍极","正信","正阳河","至味","致美斋","中邦","中冷泉","中调","珠江桥","梓山","自然集","佐参王","佐香园","中坝","天府","南吉","清湖","味华","佐餐王","一品江南","金顶","玉河","巧媳妇","齐鲁","梁山好汉","王家园子","食圣","山口","川鹰","德通","新汶","四海","德馨斋","玉兔","灯塔","仙鹤","宏林","贵族王中王","万和","口珍","同福永","威极","嘉美乐","天浩圆","铁鸟","恒裕","周太","海鸥","太阳岛","百花","小神厨","龙菲","太和","天一","美乐","三汇","通海","黑珍珠","百乐","吉鹤村","岭桥","瓦缸","味莼园","百花串","锦酿","福香居","铁石","石桥","清华","味邦","光华","罕王","营宝","非常","大有丰","沙陀","味味晓","云晓","巧妈妈","振龙","乾缘","稻香园","一品斋","孔雀","武大郎","绿芳","天赐","益彰","建洛","天口","一品江南","机轮","溢美堂","山乡","榕江","嘉乐美","万路通","肖大妈","争荣","仙源","敬义泰","昆湖","鼎兴","临江寺","迈进","玉和","通德","民天","胡玉美","楼茂记","鼎丰","古灯","槐茂","榕城","BB","汉记","松城","森江","美狮","龙华","启航","隆邦","新汶","四海","龙之味","北康","金玉兰","小二黑","吉成"]
Example #6
0
    def __init__(self):
        # 抓取设置
        #self.crawler     = MyCrawler()
        self.crawler     = RetryCrawler()

        # db
        self.mysqlAccess  = MysqlAccess() # mysql access

        # 品牌官网链接
        self.home_url   = 'http://www.taobao.com'
        self.refers     = None

        # 抓取商品列表
        self.link_list  = []
        self.items      = []

        self.begin_time = Common.now()
Example #7
0
class TMCrawler():
    '''A class of TMall shop'''
    def __init__(self):
        # 抓取设置
        #self.crawler     = MyCrawler()
        self.crawler     = RetryCrawler()

        # db
        self.mysqlAccess  = MysqlAccess() # mysql access

        # 品牌官网链接
        self.home_url   = 'http://www.taobao.com'
        self.refers     = None

        # 抓取商品列表
        self.link_list  = []
        self.items      = []

        self.begin_time = Common.now()
        
    def getPage(self, url, shop_home_url):
        position = 1
        i = 1
        max_page = 0
       
        asyn_url = ''
        i_url = url
        refers = shop_home_url
        result_s = self.get_asyn_data(i_url, refers, shop_home_url)
        m = re.search(r'<b class="ui-page-s-len">\d+/(\d+)</b>', result_s, flags=re.S) 
        if m:
            max_page = int(m.group(1))
        print '# page num:', max_page
        while i <= max_page:
            m = re.search(r'<div class="J_TItems">(.+?)<div class="pagination">', result_s, flags=re.S)
            if m:
                items_s = m.group(1)
                p = re.compile(r'<dl class=".+?".+?data-id="(.+?)">.+?<dd class="detail">\s*<a class="item-name".+?href="(.+?)".+?>(.+?)</a>\s*<div class="attribute">\s*<div class="cprice-area">\s*<span class="symbol">(.+?)</span>\s*<span\s*class="c-price">(.+?)</span>\s*</div>.+?</dl>')
                j = 1
                for item in p.finditer(items_s):
                    item_id, url_s, item_name, price_symbol, price = item.group(1), item.group(2), Common.htmlDecode(item.group(3).strip()), item.group(4).strip(), item.group(5).strip()
                    if url_s.find('http') == -1:
                        item_url = 'http:' + url_s
                    else:
                        item_url = url_s
                    print '### item ###'
                    print '# item val:', item_id, item_name, price, item_url
                    item = Item()
                    item.parserTM((item_id, item_name, price, item_url, i_url, self.begin_time))
                    print '# item info:',item.outItemSql()
                    self.mysqlAccess.insert_parser_item_info(item.outItemSql())
                    time.sleep(2)
            
            refers = i_url
            if i_url.find('pageNo=') == -1:
                i_url = re.sub(r'&tsearch=y','&pageNo=%d&tsearch=y#anchor' % i, refers)
            else:
                i_url = re.sub(r'&pageNo=\d+&','&pageNo=%d&' % i, refers)

            i += 1
            time.sleep(2)
            result_s = self.get_asyn_data(i_url, refers, shop_home_url)

    def get_asyn_data(self, i_url, refers, shop_home_url):
        result = ''
        result_s = ''
        page = self.crawler.getData(i_url, refers)
        m = re.search(r'<input id="J_ShopAsynSearchURL".+?value="(.+?)"\s*/>', page, flags=re.S)
        if m:
            ts = '?_ksTS=%s&callback=jsonp135&' % (str(int(time.time()*1000)) + '_' + str(random.randint(100,999)))
            a_url = shop_home_url + Common.htmlDecode(m.group(1))
            asyn_url = re.sub('\?', ts, a_url)
            result = self.crawler.getData(asyn_url, i_url)
            m = re.search(r'jsonp135\("(.+?)"\)', result, flags=re.S)
            if m:
                result_s = re.sub(r'\\"', '"', m.group(1))
        return result_s


    def getItems(self):
        #for link in self.link_list: self.itemPage(link)
        max_th = 10
        #if len(self.link_list) > max_th:
        #    m_itemsObj = BagItemM(self.home_url,self.brand_type, max_th)
        #else:
        #    m_itemsObj = BagItemM(self.home_url,self.brand_type, len(self.link_list))
        #m_itemsObj.createthread()
        #m_itemsObj.putItems(self.link_list)
        #m_itemsObj.run()
        #self.items.extend(m_itemsObj.items)

    def itemPage(self, val):
        print '# itemPage :', serie_title, i_title, i_name, i_price, i_unit, i_size, i_url, i_img
Example #8
0
class BagItemM(MyThread):
    '''A class of jhs item thread manager'''
    def __init__(self, home_url, brand_type, thread_num=10):
        # parent construct
        MyThread.__init__(self, thread_num)

        # db
        self.mysqlAccess  = MysqlAccess() # mysql access

        # thread lock
        self.mutex        = threading.Lock()

        self.home_url     = home_url

        self.brand_type   = brand_type

        # activity items
        self.items        = []

        # give up item, retry too many times
        self.giveup_items = []

    def push_back(self, L, v):
        if self.mutex.acquire(1):
            L.append(v)
            self.mutex.release()

    def putItem(self, _item):
        self.put_q((0, _item))

    def putItems(self, _items):
        for _item in _items: self.put_q((0, _item))

    # To crawl retry
    def crawlRetry(self, _data):
        if not _data: return
        _retry, _val = _data
        _retry += 1
        if _retry < Config.crawl_retry:
            _data = (_retry, _val)
            self.put_q(_data)
        else:
            self.push_back(self.giveup_items, _val)
            print "# retry too many times, no get item:", _val

    # To crawl item
    def crawl(self):
        while True:
            _data = None
            try:
                try:
                    # 取队列消息
                    _data = self.get_q()
                except Empty as e:
                    # 队列为空,退出
                    #print '# queue is empty', e
                    break

                _val = _data[1]
                item = BagItem(self.home_url, self.brand_type)
                item.antPage(_val)
                self.push_back(self.items, item.outItem())

                sql = item.outTuple()
                self.mysqlAccess.insert_item(sql)

                # 延时
                time.sleep(0.1)
                # 通知queue, task结束
                self.queue.task_done()

            except Exception as e:
                print 'Unknown exception crawl item :', e
                Common.traceback_log()
                self.crawlRetry(_data)
                # 通知queue, task结束
                self.queue.task_done()
                time.sleep(5)
Example #9
0
class TMCrawler():
    '''A class of TMall shop'''
    def __init__(self):
        # 抓取设置
        #self.crawler     = MyCrawler()
        self.crawler = RetryCrawler()

        # db
        self.mysqlAccess = MysqlAccess()  # mysql access

        # 品牌官网链接
        self.home_url = 'http://www.taobao.com'
        self.refers = None

        # 抓取商品列表
        self.link_list = []
        self.items = []

        self.begin_time = Common.now()

    def getPage(self, url, shop_home_url):
        position = 1
        i = 1
        max_page = 0

        asyn_url = ''
        i_url = url
        refers = shop_home_url
        result_s = self.get_asyn_data(i_url, refers, shop_home_url)
        m = re.search(r'<b class="ui-page-s-len">\d+/(\d+)</b>',
                      result_s,
                      flags=re.S)
        if m:
            max_page = int(m.group(1))
        print '# page num:', max_page
        while i <= max_page:
            m = re.search(
                r'<div class="J_TItems">(.+?)<div class="pagination">',
                result_s,
                flags=re.S)
            if m:
                items_s = m.group(1)
                p = re.compile(
                    r'<dl class=".+?".+?data-id="(.+?)">.+?<dd class="detail">\s*<a class="item-name".+?href="(.+?)".+?>(.+?)</a>\s*<div class="attribute">\s*<div class="cprice-area">\s*<span class="symbol">(.+?)</span>\s*<span\s*class="c-price">(.+?)</span>\s*</div>.+?</dl>'
                )
                j = 1
                for item in p.finditer(items_s):
                    item_id, url_s, item_name, price_symbol, price = item.group(
                        1), item.group(2), Common.htmlDecode(
                            item.group(3).strip()), item.group(
                                4).strip(), item.group(5).strip()
                    if url_s.find('http') == -1:
                        item_url = 'http:' + url_s
                    else:
                        item_url = url_s
                    print '### item ###'
                    print '# item val:', item_id, item_name, price, item_url
                    item = Item()
                    item.parserTM((item_id, item_name, price, item_url, i_url,
                                   self.begin_time))
                    print '# item info:', item.outItemSql()
                    self.mysqlAccess.insert_parser_item_info(item.outItemSql())
                    time.sleep(2)

            refers = i_url
            if i_url.find('pageNo=') == -1:
                i_url = re.sub(r'&tsearch=y',
                               '&pageNo=%d&tsearch=y#anchor' % i, refers)
            else:
                i_url = re.sub(r'&pageNo=\d+&', '&pageNo=%d&' % i, refers)

            i += 1
            time.sleep(2)
            result_s = self.get_asyn_data(i_url, refers, shop_home_url)

    def get_asyn_data(self, i_url, refers, shop_home_url):
        result = ''
        result_s = ''
        page = self.crawler.getData(i_url, refers)
        m = re.search(r'<input id="J_ShopAsynSearchURL".+?value="(.+?)"\s*/>',
                      page,
                      flags=re.S)
        if m:
            ts = '?_ksTS=%s&callback=jsonp135&' % (str(int(
                time.time() * 1000)) + '_' + str(random.randint(100, 999)))
            a_url = shop_home_url + Common.htmlDecode(m.group(1))
            asyn_url = re.sub('\?', ts, a_url)
            result = self.crawler.getData(asyn_url, i_url)
            m = re.search(r'jsonp135\("(.+?)"\)', result, flags=re.S)
            if m:
                result_s = re.sub(r'\\"', '"', m.group(1))
        return result_s

    def getItems(self):
        #for link in self.link_list: self.itemPage(link)
        max_th = 10
        #if len(self.link_list) > max_th:
        #    m_itemsObj = BagItemM(self.home_url,self.brand_type, max_th)
        #else:
        #    m_itemsObj = BagItemM(self.home_url,self.brand_type, len(self.link_list))
        #m_itemsObj.createthread()
        #m_itemsObj.putItems(self.link_list)
        #m_itemsObj.run()
        #self.items.extend(m_itemsObj.items)

    def itemPage(self, val):
        print '# itemPage :', serie_title, i_title, i_name, i_price, i_unit, i_size, i_url, i_img
Example #10
0
class taobaoSearch():
    '''A class of taobao search page'''
    def __init__(self):
        # 抓取设置
        #self.crawler     = MyCrawler()
        self.crawler     = RetryCrawler()

        # db
        self.mysqlAccess  = MysqlAccess() # mysql access

        # 品牌官网链接
        self.home_url   = 'http://www.taobao.com'
        self.refers     = None

        # 抓取商品列表
        self.link_list  = []
        self.items      = []
        self.brand_names = ["艾沃","保宁","红太阳","常润","厨邦","大华","大同","大王","德馨斋","德阳","东古","凤球唛","福临门","高真","古龙","冠生园","广味源","广祥泰","龟甲万","国味威","海鸥","海天","好伴","禾然","和田宽","恒顺","湖西岛","湖羊","黄花园","南食召","吉成","济美","加加","金冠园","金兰","金狮","有味家","金苏","荆楚","景山","居易","聚百鲜","科沁万佳","孔膳坊","快鹿","阆中","老才臣","食味的初相","老蔡","老恒和","老潘头","李锦记","利民","六月鲜","春峰","龙菲","秋生饭堂","龙牌","隆昌","楼茂记","鲁花自然鲜","云上","禄荣","麻旺","美富达","美极","美味源","蒙古真","渔山隐","米吉真味","酿一村","盘溪","彭万春","浦源","奇峰","千禾","千鹤屋","粮赞","钱万隆","清净园","清香园","仁昌记","三不加","悦意","三和四美","博爱酵园","山古坊","膳府","膳之堂","盛田","四海","寺冈","苏美","太太乐","泰康","唐人基","唐世家","淘大","腾安","同珍","妥甸","拓东","丸天","万通","万字","味事达","五天","犀浦","仙家","先市","鲜渡","咸亨","香满园","小东字","笑厨","新宇","星湖","徐同泰","薛泰丰","扬名","尧记","肴易食","一统原创","一休屋","伊例家","宜赤必","优和","鱼味鲜","禹谟","玉堂","御酿坊","缘木记","粤香园","灶基","詹王","张家三嫂","长寿结","珍极","正信","正阳河","至味","致美斋","中邦","中冷泉","中调","珠江桥","梓山","自然集","佐参王","佐香园","中坝","天府","南吉","清湖","味华","佐餐王","一品江南","金顶","玉河","巧媳妇","齐鲁","梁山好汉","王家园子","食圣","山口","川鹰","德通","新汶","四海","德馨斋","玉兔","灯塔","仙鹤","宏林","贵族王中王","万和","口珍","同福永","威极","嘉美乐","天浩圆","铁鸟","恒裕","周太","海鸥","太阳岛","百花","小神厨","龙菲","太和","天一","美乐","三汇","通海","黑珍珠","百乐","吉鹤村","岭桥","瓦缸","味莼园","百花串","锦酿","福香居","铁石","石桥","清华","味邦","光华","罕王","营宝","非常","大有丰","沙陀","味味晓","云晓","巧妈妈","振龙","乾缘","稻香园","一品斋","孔雀","武大郎","绿芳","天赐","益彰","建洛","天口","一品江南","机轮","溢美堂","山乡","榕江","嘉乐美","万路通","肖大妈","争荣","仙源","敬义泰","昆湖","鼎兴","临江寺","迈进","玉和","通德","民天","胡玉美","楼茂记","鼎丰","古灯","槐茂","榕城","BB","汉记","松城","森江","美狮","龙华","启航","隆邦","新汶","四海","龙之味","北康","金玉兰","小二黑","吉成"]
        
    def getPage(self, url):
        position = 1
        i = 1
       
        i_url = url
        refers = self.home_url
        max_page = 10
        size_page = 48
        while i <= max_page:
            page = self.crawler.getData(i_url, refers)
            refers = i_url
            i_url = url + '&bcoffset=1&s=%s' % str(i*size_page)
            i += 1
            if not page or page == '':
                print 'not find data url:',i_url
                time.sleep(4)
                continue
            m = re.search(r'<script>\s+g_page_config = ({.+?});.+?</script>', page, flags=re.S)
            if m:
                page_config = m.group(1)
                page_config_s = re.sub(r'\n+','',page_config)
                data = json.loads(page_config_s)
                if data.has_key("mods"):
                    if data["mods"].has_key("itemlist"):
                        itemlist = data["mods"]["itemlist"]
                        if itemlist.has_key("data"):
                            itemlist_data = itemlist["data"]
                            if itemlist_data.has_key("auctions"):
                                for item in itemlist_data["auctions"]:
                                    item_id = position
                                    m = re.search(r'id=(\d+)', item["detail_url"], flags=re.S)
                                    if m:
                                        item_id = m.group(1)
                                    item_sales = item["view_sales"]
                                    m = re.search(r'(\d+)', item["view_sales"], flags=re.S)
                                    if m:
                                        item_sales = m.group(1)
                                    print Common.time_s(Common.now()), position, item_id, item["raw_title"], item["view_price"], item_sales, item["user_id"], item["nick"], "http:" + item["detail_url"], "http:" + item["shopLink"]
                                    self.mysqlAccess.insert_item((Common.time_s(Common.now()), str(item_id), str(position), str(item["raw_title"]), str(item["view_price"]), str(item_sales), "http:" + item["detail_url"], item["user_id"], str(item["nick"]), "http:" + item["shopLink"]))
                                    position += 1
            time.sleep(4)


    def getItems(self):
        #for link in self.link_list: self.itemPage(link)
        max_th = 10
        #if len(self.link_list) > max_th:
        #    m_itemsObj = BagItemM(self.home_url,self.brand_type, max_th)
        #else:
        #    m_itemsObj = BagItemM(self.home_url,self.brand_type, len(self.link_list))
        #m_itemsObj.createthread()
        #m_itemsObj.putItems(self.link_list)
        #m_itemsObj.run()
        #self.items.extend(m_itemsObj.items)

    def itemPage(self, val):
        print '# itemPage :', serie_title, i_title, i_name, i_price, i_unit, i_size, i_url, i_img

    def outItems(self, f):
        s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号'
        with open(f, 'w') as f_item:
            self.items.insert(0, s)
            f_item.write('\n'.join(self.items))