def __init__(self): # db self.mysqlAccess = MysqlAccess() # mysql access self.brand_names = [ "艾沃", "保宁", "红太阳", "常润", "厨邦", "大华", "大同", "大王", "德馨斋", "德阳", "东古", "凤球唛", "福临门", "高真", "古龙", "冠生园", "广味源", "广祥泰", "龟甲万", "国味威", "海鸥", "海天", "好伴", "禾然", "和田宽", "恒顺", "湖西岛", "湖羊", "黄花园", "南食召", "吉成", "济美", "加加", "金冠园", "金兰", "金狮", "有味家", "金苏", "荆楚", "景山", "居易", "聚百鲜", "科沁万佳", "孔膳坊", "快鹿", "阆中", "老才臣", "食味的初相", "老蔡", "老恒和", "老潘头", "李锦记", "利民", "六月鲜", "春峰", "龙菲", "秋生饭堂", "龙牌", "隆昌", "楼茂记", "鲁花自然鲜", "云上", "禄荣", "麻旺", "美富达", "美极", "美味源", "蒙古真", "渔山隐", "米吉真味", "酿一村", "盘溪", "彭万春", "浦源", "奇峰", "千禾", "千鹤屋", "粮赞", "钱万隆", "清净园", "清香园", "仁昌记", "三不加", "悦意", "三和四美", "博爱酵园", "山古坊", "膳府", "膳之堂", "盛田", "四海", "寺冈", "苏美", "太太乐", "泰康", "唐人基", "唐世家", "淘大", "腾安", "同珍", "妥甸", "拓东", "丸天", "万通", "万字", "味事达", "五天", "犀浦", "仙家", "先市", "鲜渡", "咸亨", "香满园", "小东字", "笑厨", "新宇", "星湖", "徐同泰", "薛泰丰", "扬名", "尧记", "肴易食", "一统原创", "一休屋", "伊例家", "宜赤必", "优和", "鱼味鲜", "禹谟", "玉堂", "御酿坊", "缘木记", "粤香园", "灶基", "詹王", "张家三嫂", "长寿结", "珍极", "正信", "正阳河", "至味", "致美斋", "中邦", "中冷泉", "中调", "珠江桥", "梓山", "自然集", "佐参王", "佐香园", "中坝", "天府", "南吉", "清湖", "味华", "佐餐王", "一品江南", "金顶", "玉河", "巧媳妇", "齐鲁", "梁山好汉", "王家园子", "食圣", "山口", "川鹰", "德通", "新汶", "四海", "德馨斋", "玉兔", "灯塔", "仙鹤", "宏林", "贵族王中王", "万和", "口珍", "同福永", "威极", "嘉美乐", "天浩圆", "铁鸟", "恒裕", "周太", "海鸥", "太阳岛", "百花", "小神厨", "龙菲", "太和", "天一", "美乐", "三汇", "通海", "黑珍珠", "百乐", "吉鹤村", "岭桥", "瓦缸", "味莼园", "百花串", "锦酿", "福香居", "铁石", "石桥", "清华", "味邦", "光华", "罕王", "营宝", "非常", "大有丰", "沙陀", "味味晓", "云晓", "巧妈妈", "振龙", "乾缘", "稻香园", "一品斋", "孔雀", "武大郎", "绿芳", "天赐", "益彰", "建洛", "天口", "一品江南", "机轮", "溢美堂", "山乡", "榕江", "嘉乐美", "万路通", "肖大妈", "争荣", "仙源", "敬义泰", "昆湖", "鼎兴", "临江寺", "迈进", "玉和", "通德", "民天", "胡玉美", "楼茂记", "鼎丰", "古灯", "槐茂", "榕城", "BB", "汉记", "松城", "森江", "美狮", "龙华", "启航", "隆邦", "新汶", "四海", "龙之味", "北康", "金玉兰", "小二黑", "吉成" ]
def __init__(self): # 抓取设置 #self.crawler = MyCrawler() self.crawler = RetryCrawler() # db self.mysqlAccess = MysqlAccess() # mysql access # 品牌官网链接 self.home_url = 'http://www.taobao.com' self.refers = None # 抓取商品列表 self.link_list = [] self.items = [] self.begin_time = Common.now()
def __init__(self, home_url, brand_type, thread_num=10): # parent construct MyThread.__init__(self, thread_num) # db self.mysqlAccess = MysqlAccess() # mysql access # thread lock self.mutex = threading.Lock() self.home_url = home_url self.brand_type = brand_type # activity items self.items = [] # give up item, retry too many times self.giveup_items = []
class updatetaobaoBrand(): '''A class of update taobao Brand''' def __init__(self): # db self.mysqlAccess = MysqlAccess() # mysql access self.brand_names = ["艾沃","保宁","红太阳","常润","厨邦","大华","大同","大王","德馨斋","德阳","东古","凤球唛","福临门","高真","古龙","冠生园","广味源","广祥泰","龟甲万","国味威","海鸥","海天","好伴","禾然","和田宽","恒顺","湖西岛","湖羊","黄花园","南食召","吉成","济美","加加","金冠园","金兰","金狮","有味家","金苏","荆楚","景山","居易","聚百鲜","科沁万佳","孔膳坊","快鹿","阆中","老才臣","食味的初相","老蔡","老恒和","老潘头","李锦记","利民","六月鲜","春峰","龙菲","秋生饭堂","龙牌","隆昌","楼茂记","鲁花自然鲜","云上","禄荣","麻旺","美富达","美极","美味源","蒙古真","渔山隐","米吉真味","酿一村","盘溪","彭万春","浦源","奇峰","千禾","千鹤屋","粮赞","钱万隆","清净园","清香园","仁昌记","三不加","悦意","三和四美","博爱酵园","山古坊","膳府","膳之堂","盛田","四海","寺冈","苏美","太太乐","泰康","唐人基","唐世家","淘大","腾安","同珍","妥甸","拓东","丸天","万通","万字","味事达","五天","犀浦","仙家","先市","鲜渡","咸亨","香满园","小东字","笑厨","新宇","星湖","徐同泰","薛泰丰","扬名","尧记","肴易食","一统原创","一休屋","伊例家","宜赤必","优和","鱼味鲜","禹谟","玉堂","御酿坊","缘木记","粤香园","灶基","詹王","张家三嫂","长寿结","珍极","正信","正阳河","至味","致美斋","中邦","中冷泉","中调","珠江桥","梓山","自然集","佐参王","佐香园","中坝","天府","南吉","清湖","味华","佐餐王","一品江南","金顶","玉河","巧媳妇","齐鲁","梁山好汉","王家园子","食圣","山口","川鹰","德通","新汶","四海","德馨斋","玉兔","灯塔","仙鹤","宏林","贵族王中王","万和","口珍","同福永","威极","嘉美乐","天浩圆","铁鸟","恒裕","周太","海鸥","太阳岛","百花","小神厨","龙菲","太和","天一","美乐","三汇","通海","黑珍珠","百乐","吉鹤村","岭桥","瓦缸","味莼园","百花串","锦酿","福香居","铁石","石桥","清华","味邦","光华","罕王","营宝","非常","大有丰","沙陀","味味晓","云晓","巧妈妈","振龙","乾缘","稻香园","一品斋","孔雀","武大郎","绿芳","天赐","益彰","建洛","天口","一品江南","机轮","溢美堂","山乡","榕江","嘉乐美","万路通","肖大妈","争荣","仙源","敬义泰","昆湖","鼎兴","临江寺","迈进","玉和","通德","民天","胡玉美","楼茂记","鼎丰","古灯","槐茂","榕城","BB","汉记","松城","森江","美狮","龙华","启航","隆邦","新汶","四海","龙之味","北康","金玉兰","小二黑","吉成"] def update_item_brand(self): items = self.mysqlAccess.get_allitems() for item in items: brandnames = '' item_id, item_name = item for brandname in self.brand_names: if item_name.find(brandname) != -1: brandnames += brandname + '|' if brandnames != '': print item_id, item_name, brandnames[:-1] self.mysqlAccess.update_item_brand((brandnames[:-1],str(item_id)))
def __init__(self): # 抓取设置 #self.crawler = MyCrawler() self.crawler = RetryCrawler() # db self.mysqlAccess = MysqlAccess() # mysql access # 品牌官网链接 self.home_url = 'http://www.taobao.com' self.refers = None # 抓取商品列表 self.link_list = [] self.items = [] self.brand_names = ["艾沃","保宁","红太阳","常润","厨邦","大华","大同","大王","德馨斋","德阳","东古","凤球唛","福临门","高真","古龙","冠生园","广味源","广祥泰","龟甲万","国味威","海鸥","海天","好伴","禾然","和田宽","恒顺","湖西岛","湖羊","黄花园","南食召","吉成","济美","加加","金冠园","金兰","金狮","有味家","金苏","荆楚","景山","居易","聚百鲜","科沁万佳","孔膳坊","快鹿","阆中","老才臣","食味的初相","老蔡","老恒和","老潘头","李锦记","利民","六月鲜","春峰","龙菲","秋生饭堂","龙牌","隆昌","楼茂记","鲁花自然鲜","云上","禄荣","麻旺","美富达","美极","美味源","蒙古真","渔山隐","米吉真味","酿一村","盘溪","彭万春","浦源","奇峰","千禾","千鹤屋","粮赞","钱万隆","清净园","清香园","仁昌记","三不加","悦意","三和四美","博爱酵园","山古坊","膳府","膳之堂","盛田","四海","寺冈","苏美","太太乐","泰康","唐人基","唐世家","淘大","腾安","同珍","妥甸","拓东","丸天","万通","万字","味事达","五天","犀浦","仙家","先市","鲜渡","咸亨","香满园","小东字","笑厨","新宇","星湖","徐同泰","薛泰丰","扬名","尧记","肴易食","一统原创","一休屋","伊例家","宜赤必","优和","鱼味鲜","禹谟","玉堂","御酿坊","缘木记","粤香园","灶基","詹王","张家三嫂","长寿结","珍极","正信","正阳河","至味","致美斋","中邦","中冷泉","中调","珠江桥","梓山","自然集","佐参王","佐香园","中坝","天府","南吉","清湖","味华","佐餐王","一品江南","金顶","玉河","巧媳妇","齐鲁","梁山好汉","王家园子","食圣","山口","川鹰","德通","新汶","四海","德馨斋","玉兔","灯塔","仙鹤","宏林","贵族王中王","万和","口珍","同福永","威极","嘉美乐","天浩圆","铁鸟","恒裕","周太","海鸥","太阳岛","百花","小神厨","龙菲","太和","天一","美乐","三汇","通海","黑珍珠","百乐","吉鹤村","岭桥","瓦缸","味莼园","百花串","锦酿","福香居","铁石","石桥","清华","味邦","光华","罕王","营宝","非常","大有丰","沙陀","味味晓","云晓","巧妈妈","振龙","乾缘","稻香园","一品斋","孔雀","武大郎","绿芳","天赐","益彰","建洛","天口","一品江南","机轮","溢美堂","山乡","榕江","嘉乐美","万路通","肖大妈","争荣","仙源","敬义泰","昆湖","鼎兴","临江寺","迈进","玉和","通德","民天","胡玉美","楼茂记","鼎丰","古灯","槐茂","榕城","BB","汉记","松城","森江","美狮","龙华","启航","隆邦","新汶","四海","龙之味","北康","金玉兰","小二黑","吉成"]
class TMCrawler(): '''A class of TMall shop''' def __init__(self): # 抓取设置 #self.crawler = MyCrawler() self.crawler = RetryCrawler() # db self.mysqlAccess = MysqlAccess() # mysql access # 品牌官网链接 self.home_url = 'http://www.taobao.com' self.refers = None # 抓取商品列表 self.link_list = [] self.items = [] self.begin_time = Common.now() def getPage(self, url, shop_home_url): position = 1 i = 1 max_page = 0 asyn_url = '' i_url = url refers = shop_home_url result_s = self.get_asyn_data(i_url, refers, shop_home_url) m = re.search(r'<b class="ui-page-s-len">\d+/(\d+)</b>', result_s, flags=re.S) if m: max_page = int(m.group(1)) print '# page num:', max_page while i <= max_page: m = re.search(r'<div class="J_TItems">(.+?)<div class="pagination">', result_s, flags=re.S) if m: items_s = m.group(1) p = re.compile(r'<dl class=".+?".+?data-id="(.+?)">.+?<dd class="detail">\s*<a class="item-name".+?href="(.+?)".+?>(.+?)</a>\s*<div class="attribute">\s*<div class="cprice-area">\s*<span class="symbol">(.+?)</span>\s*<span\s*class="c-price">(.+?)</span>\s*</div>.+?</dl>') j = 1 for item in p.finditer(items_s): item_id, url_s, item_name, price_symbol, price = item.group(1), item.group(2), Common.htmlDecode(item.group(3).strip()), item.group(4).strip(), item.group(5).strip() if url_s.find('http') == -1: item_url = 'http:' + url_s else: item_url = url_s print '### item ###' print '# item val:', item_id, item_name, price, item_url item = Item() item.parserTM((item_id, item_name, price, item_url, i_url, self.begin_time)) print '# item info:',item.outItemSql() self.mysqlAccess.insert_parser_item_info(item.outItemSql()) time.sleep(2) refers = i_url if i_url.find('pageNo=') == -1: i_url = re.sub(r'&tsearch=y','&pageNo=%d&tsearch=y#anchor' % i, refers) else: i_url = re.sub(r'&pageNo=\d+&','&pageNo=%d&' % i, refers) i += 1 time.sleep(2) result_s = self.get_asyn_data(i_url, refers, shop_home_url) def get_asyn_data(self, i_url, refers, shop_home_url): result = '' result_s = '' page = self.crawler.getData(i_url, refers) m = re.search(r'<input id="J_ShopAsynSearchURL".+?value="(.+?)"\s*/>', page, flags=re.S) if m: ts = '?_ksTS=%s&callback=jsonp135&' % (str(int(time.time()*1000)) + '_' + str(random.randint(100,999))) a_url = shop_home_url + Common.htmlDecode(m.group(1)) asyn_url = re.sub('\?', ts, a_url) result = self.crawler.getData(asyn_url, i_url) m = re.search(r'jsonp135\("(.+?)"\)', result, flags=re.S) if m: result_s = re.sub(r'\\"', '"', m.group(1)) return result_s def getItems(self): #for link in self.link_list: self.itemPage(link) max_th = 10 #if len(self.link_list) > max_th: # m_itemsObj = BagItemM(self.home_url,self.brand_type, max_th) #else: # m_itemsObj = BagItemM(self.home_url,self.brand_type, len(self.link_list)) #m_itemsObj.createthread() #m_itemsObj.putItems(self.link_list) #m_itemsObj.run() #self.items.extend(m_itemsObj.items) def itemPage(self, val): print '# itemPage :', serie_title, i_title, i_name, i_price, i_unit, i_size, i_url, i_img
class BagItemM(MyThread): '''A class of jhs item thread manager''' def __init__(self, home_url, brand_type, thread_num=10): # parent construct MyThread.__init__(self, thread_num) # db self.mysqlAccess = MysqlAccess() # mysql access # thread lock self.mutex = threading.Lock() self.home_url = home_url self.brand_type = brand_type # activity items self.items = [] # give up item, retry too many times self.giveup_items = [] def push_back(self, L, v): if self.mutex.acquire(1): L.append(v) self.mutex.release() def putItem(self, _item): self.put_q((0, _item)) def putItems(self, _items): for _item in _items: self.put_q((0, _item)) # To crawl retry def crawlRetry(self, _data): if not _data: return _retry, _val = _data _retry += 1 if _retry < Config.crawl_retry: _data = (_retry, _val) self.put_q(_data) else: self.push_back(self.giveup_items, _val) print "# retry too many times, no get item:", _val # To crawl item def crawl(self): while True: _data = None try: try: # 取队列消息 _data = self.get_q() except Empty as e: # 队列为空,退出 #print '# queue is empty', e break _val = _data[1] item = BagItem(self.home_url, self.brand_type) item.antPage(_val) self.push_back(self.items, item.outItem()) sql = item.outTuple() self.mysqlAccess.insert_item(sql) # 延时 time.sleep(0.1) # 通知queue, task结束 self.queue.task_done() except Exception as e: print 'Unknown exception crawl item :', e Common.traceback_log() self.crawlRetry(_data) # 通知queue, task结束 self.queue.task_done() time.sleep(5)
class TMCrawler(): '''A class of TMall shop''' def __init__(self): # 抓取设置 #self.crawler = MyCrawler() self.crawler = RetryCrawler() # db self.mysqlAccess = MysqlAccess() # mysql access # 品牌官网链接 self.home_url = 'http://www.taobao.com' self.refers = None # 抓取商品列表 self.link_list = [] self.items = [] self.begin_time = Common.now() def getPage(self, url, shop_home_url): position = 1 i = 1 max_page = 0 asyn_url = '' i_url = url refers = shop_home_url result_s = self.get_asyn_data(i_url, refers, shop_home_url) m = re.search(r'<b class="ui-page-s-len">\d+/(\d+)</b>', result_s, flags=re.S) if m: max_page = int(m.group(1)) print '# page num:', max_page while i <= max_page: m = re.search( r'<div class="J_TItems">(.+?)<div class="pagination">', result_s, flags=re.S) if m: items_s = m.group(1) p = re.compile( r'<dl class=".+?".+?data-id="(.+?)">.+?<dd class="detail">\s*<a class="item-name".+?href="(.+?)".+?>(.+?)</a>\s*<div class="attribute">\s*<div class="cprice-area">\s*<span class="symbol">(.+?)</span>\s*<span\s*class="c-price">(.+?)</span>\s*</div>.+?</dl>' ) j = 1 for item in p.finditer(items_s): item_id, url_s, item_name, price_symbol, price = item.group( 1), item.group(2), Common.htmlDecode( item.group(3).strip()), item.group( 4).strip(), item.group(5).strip() if url_s.find('http') == -1: item_url = 'http:' + url_s else: item_url = url_s print '### item ###' print '# item val:', item_id, item_name, price, item_url item = Item() item.parserTM((item_id, item_name, price, item_url, i_url, self.begin_time)) print '# item info:', item.outItemSql() self.mysqlAccess.insert_parser_item_info(item.outItemSql()) time.sleep(2) refers = i_url if i_url.find('pageNo=') == -1: i_url = re.sub(r'&tsearch=y', '&pageNo=%d&tsearch=y#anchor' % i, refers) else: i_url = re.sub(r'&pageNo=\d+&', '&pageNo=%d&' % i, refers) i += 1 time.sleep(2) result_s = self.get_asyn_data(i_url, refers, shop_home_url) def get_asyn_data(self, i_url, refers, shop_home_url): result = '' result_s = '' page = self.crawler.getData(i_url, refers) m = re.search(r'<input id="J_ShopAsynSearchURL".+?value="(.+?)"\s*/>', page, flags=re.S) if m: ts = '?_ksTS=%s&callback=jsonp135&' % (str(int( time.time() * 1000)) + '_' + str(random.randint(100, 999))) a_url = shop_home_url + Common.htmlDecode(m.group(1)) asyn_url = re.sub('\?', ts, a_url) result = self.crawler.getData(asyn_url, i_url) m = re.search(r'jsonp135\("(.+?)"\)', result, flags=re.S) if m: result_s = re.sub(r'\\"', '"', m.group(1)) return result_s def getItems(self): #for link in self.link_list: self.itemPage(link) max_th = 10 #if len(self.link_list) > max_th: # m_itemsObj = BagItemM(self.home_url,self.brand_type, max_th) #else: # m_itemsObj = BagItemM(self.home_url,self.brand_type, len(self.link_list)) #m_itemsObj.createthread() #m_itemsObj.putItems(self.link_list) #m_itemsObj.run() #self.items.extend(m_itemsObj.items) def itemPage(self, val): print '# itemPage :', serie_title, i_title, i_name, i_price, i_unit, i_size, i_url, i_img
class taobaoSearch(): '''A class of taobao search page''' def __init__(self): # 抓取设置 #self.crawler = MyCrawler() self.crawler = RetryCrawler() # db self.mysqlAccess = MysqlAccess() # mysql access # 品牌官网链接 self.home_url = 'http://www.taobao.com' self.refers = None # 抓取商品列表 self.link_list = [] self.items = [] self.brand_names = ["艾沃","保宁","红太阳","常润","厨邦","大华","大同","大王","德馨斋","德阳","东古","凤球唛","福临门","高真","古龙","冠生园","广味源","广祥泰","龟甲万","国味威","海鸥","海天","好伴","禾然","和田宽","恒顺","湖西岛","湖羊","黄花园","南食召","吉成","济美","加加","金冠园","金兰","金狮","有味家","金苏","荆楚","景山","居易","聚百鲜","科沁万佳","孔膳坊","快鹿","阆中","老才臣","食味的初相","老蔡","老恒和","老潘头","李锦记","利民","六月鲜","春峰","龙菲","秋生饭堂","龙牌","隆昌","楼茂记","鲁花自然鲜","云上","禄荣","麻旺","美富达","美极","美味源","蒙古真","渔山隐","米吉真味","酿一村","盘溪","彭万春","浦源","奇峰","千禾","千鹤屋","粮赞","钱万隆","清净园","清香园","仁昌记","三不加","悦意","三和四美","博爱酵园","山古坊","膳府","膳之堂","盛田","四海","寺冈","苏美","太太乐","泰康","唐人基","唐世家","淘大","腾安","同珍","妥甸","拓东","丸天","万通","万字","味事达","五天","犀浦","仙家","先市","鲜渡","咸亨","香满园","小东字","笑厨","新宇","星湖","徐同泰","薛泰丰","扬名","尧记","肴易食","一统原创","一休屋","伊例家","宜赤必","优和","鱼味鲜","禹谟","玉堂","御酿坊","缘木记","粤香园","灶基","詹王","张家三嫂","长寿结","珍极","正信","正阳河","至味","致美斋","中邦","中冷泉","中调","珠江桥","梓山","自然集","佐参王","佐香园","中坝","天府","南吉","清湖","味华","佐餐王","一品江南","金顶","玉河","巧媳妇","齐鲁","梁山好汉","王家园子","食圣","山口","川鹰","德通","新汶","四海","德馨斋","玉兔","灯塔","仙鹤","宏林","贵族王中王","万和","口珍","同福永","威极","嘉美乐","天浩圆","铁鸟","恒裕","周太","海鸥","太阳岛","百花","小神厨","龙菲","太和","天一","美乐","三汇","通海","黑珍珠","百乐","吉鹤村","岭桥","瓦缸","味莼园","百花串","锦酿","福香居","铁石","石桥","清华","味邦","光华","罕王","营宝","非常","大有丰","沙陀","味味晓","云晓","巧妈妈","振龙","乾缘","稻香园","一品斋","孔雀","武大郎","绿芳","天赐","益彰","建洛","天口","一品江南","机轮","溢美堂","山乡","榕江","嘉乐美","万路通","肖大妈","争荣","仙源","敬义泰","昆湖","鼎兴","临江寺","迈进","玉和","通德","民天","胡玉美","楼茂记","鼎丰","古灯","槐茂","榕城","BB","汉记","松城","森江","美狮","龙华","启航","隆邦","新汶","四海","龙之味","北康","金玉兰","小二黑","吉成"] def getPage(self, url): position = 1 i = 1 i_url = url refers = self.home_url max_page = 10 size_page = 48 while i <= max_page: page = self.crawler.getData(i_url, refers) refers = i_url i_url = url + '&bcoffset=1&s=%s' % str(i*size_page) i += 1 if not page or page == '': print 'not find data url:',i_url time.sleep(4) continue m = re.search(r'<script>\s+g_page_config = ({.+?});.+?</script>', page, flags=re.S) if m: page_config = m.group(1) page_config_s = re.sub(r'\n+','',page_config) data = json.loads(page_config_s) if data.has_key("mods"): if data["mods"].has_key("itemlist"): itemlist = data["mods"]["itemlist"] if itemlist.has_key("data"): itemlist_data = itemlist["data"] if itemlist_data.has_key("auctions"): for item in itemlist_data["auctions"]: item_id = position m = re.search(r'id=(\d+)', item["detail_url"], flags=re.S) if m: item_id = m.group(1) item_sales = item["view_sales"] m = re.search(r'(\d+)', item["view_sales"], flags=re.S) if m: item_sales = m.group(1) print Common.time_s(Common.now()), position, item_id, item["raw_title"], item["view_price"], item_sales, item["user_id"], item["nick"], "http:" + item["detail_url"], "http:" + item["shopLink"] self.mysqlAccess.insert_item((Common.time_s(Common.now()), str(item_id), str(position), str(item["raw_title"]), str(item["view_price"]), str(item_sales), "http:" + item["detail_url"], item["user_id"], str(item["nick"]), "http:" + item["shopLink"])) position += 1 time.sleep(4) def getItems(self): #for link in self.link_list: self.itemPage(link) max_th = 10 #if len(self.link_list) > max_th: # m_itemsObj = BagItemM(self.home_url,self.brand_type, max_th) #else: # m_itemsObj = BagItemM(self.home_url,self.brand_type, len(self.link_list)) #m_itemsObj.createthread() #m_itemsObj.putItems(self.link_list) #m_itemsObj.run() #self.items.extend(m_itemsObj.items) def itemPage(self, val): print '# itemPage :', serie_title, i_title, i_name, i_price, i_unit, i_size, i_url, i_img def outItems(self, f): s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号' with open(f, 'w') as f_item: self.items.insert(0, s) f_item.write('\n'.join(self.items))