i_size += m.group(1) + ":" + m.group(2) + ";" i_number m = re.search(r' <div class="modelFabricColorWrapper">\s*<div class="inner".*?>\s*<span class="modelTitle">.+?</span>.+?<span.*?class="value">(.+?)</span>\s*</div>\s*</div>\s*</div>', page, flags=re.S) if m: i_number = m.group(1) i = BagItem(self.brand_type) i.initItem(serie_title, '', i_name, '', '', i_size, i_url, i_img, i_number) print '# itemPage:',i.outItem() #self.items.append(i.outItem()) #print '# itemPage :', serie_title, i_name, i_url, i_img, i_size def outItems(self, f): s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号' with open(f, 'w') as f_item: self.items.insert(0, s) f_item.write('\n'.join(self.items)) if __name__ == '__main__': print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) b = BottegavenetaBag() b_url = "http://www.bottegaveneta.com/wy/%E5%A5%B3%E5%A3%AB/onlineboutique/%E6%89%8B%E8%A2%8B" b.bagPage(b_url) b.bagItems() f = Config.dataPath + 'bottegaveneta_%s.txt' %Common.today_ss() b.outItems(f) print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
m = re.search( r'<div id="itemTechSheet">.+?<p class="prodCode">(.+?)</p>', page, flags=re.S) if m: i_number = m.group(1).split(':')[1].strip() i = BagItem(self.brand_type) i.initItem('', item_title, i_name, i_price, i_unit, i_size, i_url, i_img, i_number) print '# itemPage:', i.outItem() #self.items.append(i.outItem()) def outItems(self, f): s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号' with open(f, 'w') as f_item: self.items.insert(0, s) f_item.write('\n'.join(self.items)) if __name__ == '__main__': print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) b = DolcegabbanaBag() b_url = "http://www.dolcegabbana.com.cn/cn/dolce-gabbana/%E5%A5%B3%E5%A3%AB/onlinestore/%E5%8C%85%E8%A2%8B" b.bagPage(b_url) b.bagItems() f = Config.dataPath + 'dolcegabbana_%s.txt' % Common.today_ss() b.outItems(f) print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
m = re.search(r'"currency-symbol":"(.+?)"', data, flags=re.S) if m: unit = m.group(1) if self.item_price != '': if price: i_price += '-' + price else: if price: i_price = price if unit: i_unit = unit i = BagItem(self.brand_type) i.initItem(serie_title, i_title, i_name, i_price, i_unit, i_size, i_url, i_img, i_number) #print '# itemPage :', serie_title, i_title, i_name, i_price, i_unit, i_size, i_url, i_img def outItems(self, f): s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号' with open(f, 'w') as f_item: self.items.insert(0, s) f_item.write('\n'.join(self.items)) if __name__ == '__main__': print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) b = ChanelBag() b_url = 'http://www.chanel.com/zh_CN/fashion/products/handbags.html' b.bagPage(b_url) b.bagItems() f = Config.dataPath + 'chanel_%s.txt' %Common.today_ss() b.outItems(f) print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
def bagItems(self): #for link in self.link_list: self.itemPage(link) max_th = 10 if len(self.link_list) > max_th: m_itemsObj = BagItemM(self.home_url, self.brand_type, max_th) else: m_itemsObj = BagItemM(self.home_url, self.brand_type, len(self.link_list)) m_itemsObj.createthread() m_itemsObj.putItems(self.link_list) m_itemsObj.run() self.items.extend(m_itemsObj.items) def outItems(self, f): s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号' with open(f, 'w') as f_item: self.items.insert(0, s) f_item.write('\n'.join(self.items)) if __name__ == '__main__': print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) b = GivenchyBag() b_url = 'http://www.givenchy.com/cn/' b.bagPage(b_url) b.bagItems() f = Config.dataPath + 'givenchy_%s.txt' %Common.today_ss() b.outItems(f) print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
m = re.search( r'<div class="base">\s*<div class="sku-brand">.+?<dl class="hidden"><dt>商品货号: </dt><dd>(.+?)</dd></dl>\s*</div>', page, flags=re.S) if m: i_number = m.group(1) i = BagItem(self.brand_type) i.initItem(serie_title, '', i_name, i_price, i_unit, i_size, i_url, i_img, i_number) print '# itemPage:', i.outItem() #self.items.append(i.outItem()) def outItems(self, f): s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号' with open(f, 'w') as f_item: self.items.insert(0, s) f_item.write('\n'.join(self.items)) if __name__ == '__main__': print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) b = BossBag() b_url = "http://store.hugoboss.cn/category.php?id=3835&form_nav" b.bagPage(b_url) b.bagItems() f = Config.dataPath + 'boss_%s.txt' % Common.today_ss() b.outItems(f) print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
#i_size = "".join(size_str.split()) i_size = re.sub(r'\s*','',size_str) print "".join(i_size.split()) i_number = '' m = re.search(r' <div class="modelFabricColorWrapper">\s*<div class="inner".*?>\s*<span class="modelTitle">.+?</span>.+?<span.*?class="value">(.+?)</span>\s*</div>\s*</div>\s*</div>', page, flags=re.S) if m: i_number = m.group(1) i = BagItem() i.initItem(serie_title, '', i_name, i_price, i_unit, i_size, i_url, i_img, i_number) self.items.append(i.outItem) #print '# itemPage :', serie_title, i_name, i_price, i_unit, i_size, i_url, i_img def outItems(self, f): s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号' with open(f, 'w') as f_item: self.items.insert(0, s) f_item.write('\n'.join(self.items)) if __name__ == '__main__': print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) b = ArmaniBag() b.bagPage() b.bagItems() f = Config.dataPath + 'armani_%s.txt' %Common.today_ss() b.outItems(f) print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
i_number = '' m = re.search( r' <div class="modelFabricColorWrapper">\s*<div class="inner".*?>\s*<span class="modelTitle">.+?</span>.+?<span.*?class="value">(.+?)</span>\s*</div>\s*</div>\s*</div>', page, flags=re.S) if m: i_number = m.group(1) i = BagItem() i.initItem(serie_title, '', i_name, i_price, i_unit, i_size, i_url, i_img, i_number) self.items.append(i.outItem) #print '# itemPage :', serie_title, i_name, i_price, i_unit, i_size, i_url, i_img def outItems(self, f): s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号' with open(f, 'w') as f_item: self.items.insert(0, s) f_item.write('\n'.join(self.items)) if __name__ == '__main__': print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) b = ArmaniBag() b.bagPage() b.bagItems() f = Config.dataPath + 'armani_%s.txt' % Common.today_ss() b.outItems(f) print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
if m: i_size = m.group(1).strip() i_number = '' m = re.search(r'<div class="columns-wrapper">.+?<div class="column">.*?<div class="reference">\s*<p>(.+?)</p>\s*</div>', page, flags=re.S) if m: s_number = m.group(1) i_number = s_number.split('-')[1].strip() i = BagItem() i.initItem(serie_title, i_title, i_name, i_price, i_unit, i_size, i_url, i_img, i_number) self.items.append(i.outItem) print '# itemPage :', serie_title, i_title, i_name, i_price, i_unit, i_size, i_url, i_img def outItems(self, f): s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号' with open(f, 'w') as f_item: self.items.insert(0, s) f_item.write('\n'.join(self.items)) if __name__ == '__main__': print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) b = DiorBag() b_url = 'http://www.dior.cn/couture/zh_cn/%E5%A5%B3%E5%A3%AB%E6%97%B6%E8%A3%85/%E7%9A%AE%E5%85%B7' b.bagPage(b_url) b.bagItems() f = Config.dataPath + 'dior_%s.txt' %Common.today_ss() b.outItems(f) print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
r' <div class="modelFabricColorWrapper">\s*<div class="inner".*?>\s*<span class="modelTitle">.+?</span>.+?<span.*?class="value">(.+?)</span>\s*</div>\s*</div>\s*</div>', page, flags=re.S) if m: i_number = m.group(1) i = BagItem(self.brand_type) i.initItem(serie_title, '', i_name, '', '', i_size, i_url, i_img, i_number) print '# itemPage:', i.outItem() #self.items.append(i.outItem()) #print '# itemPage :', serie_title, i_name, i_url, i_img, i_size def outItems(self, f): s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号' with open(f, 'w') as f_item: self.items.insert(0, s) f_item.write('\n'.join(self.items)) if __name__ == '__main__': print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) b = BottegavenetaBag() b_url = "http://www.bottegaveneta.com/wy/%E5%A5%B3%E5%A3%AB/onlineboutique/%E6%89%8B%E8%A2%8B" b.bagPage(b_url) b.bagItems() f = Config.dataPath + 'bottegaveneta_%s.txt' % Common.today_ss() b.outItems(f) print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
self.crawler = MyCrawler() # 品牌官网链接 self.home_url = 'http://www.mcmworldwide.com' self.women_url = self.home_url + '/en/women' self.bag_url = self.women_url + '/bags' self.backpack_url = self.women_url + '/backpacks' self.leather_url = self.women_url + '/small-leather-goods' self.refers = None # 抓取商品列表 self.links = [] self.items = [] def bagPage(self): url = self.bug_url + '#start=0&sz=32&srule=New' page = self.crawler.getData(self.bag_url, self.women_url) if not page or page == '': return if __name__ == '__main__': b = ChanelBag() b_url = 'http://www.chanel.com/zh_CN/fashion/products/handbags/g.spring-summer-2015.c.15S.html' b.bagPage(b_url) b.bagItems() f = Config.dataPath + 'chanel_%s.txt' % Common.today_ss() print f b.outItems(f)
m = re.search(r'<h2 class="sku reading-and-link-text">(.+?)</h2>', page, flags=re.S) if m: i_number = m.group(1).strip() else: m = re.search(r'<meta itemprop="identifier" content="sku:(.+?)"/>', page, flags=re.S) if m: i_number = m.group(1).strip() i = BagItem(self.brand_type) i.initItem(serie_title, '', i_name, i_price, i_unit, i_size, i_url, i_img, i_number) print '# itemPage:',i.outItem() #self.items.append(i.outItem()) def outItems(self, f): s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号' with open(f, 'w') as f_item: self.items.insert(0, s) f_item.write('\n'.join(self.items)) if __name__ == '__main__': print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) b = LouisvuittonBag() b_url = "http://www.louisvuitton.cn/zhs-cn/homepage" b.bagPage(b_url) b.bagItems() f = Config.dataPath + 'louisvuitton_%s.txt' %Common.today_ss() b.outItems(f) print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
r'<div class="columns-wrapper">.+?<div class="column">.*?<div class="reference">\s*<p>(.+?)</p>\s*</div>', page, flags=re.S) if m: s_number = m.group(1) i_number = s_number.split('-')[1].strip() i = BagItem() i.initItem(serie_title, i_title, i_name, i_price, i_unit, i_size, i_url, i_img, i_number) self.items.append(i.outItem) print '# itemPage :', serie_title, i_title, i_name, i_price, i_unit, i_size, i_url, i_img def outItems(self, f): s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号' with open(f, 'w') as f_item: self.items.insert(0, s) f_item.write('\n'.join(self.items)) if __name__ == '__main__': print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) b = DiorBag() b_url = 'http://www.dior.cn/couture/zh_cn/%E5%A5%B3%E5%A3%AB%E6%97%B6%E8%A3%85/%E7%9A%AE%E5%85%B7' b.bagPage(b_url) b.bagItems() f = Config.dataPath + 'dior_%s.txt' % Common.today_ss() b.outItems(f) print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
i_number = '' m = re.search(r'<div class="product-code">(.+?)型号代码(.+?)</div>', page, flags=re.S) if m: i_size, i_number = m.group(1).strip(), m.group(2).strip() i = BagItem(self.brand_type) i.initItem(serie_title, '', i_name, i_price, i_unit, i_size, i_url, i_img, i_number) print '# itemPage:', i.outItem() #self.items.append(i.outItem()) def outItems(self, f): s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号' with open(f, 'w') as f_item: self.items.insert(0, s) f_item.write('\n'.join(self.items)) if __name__ == '__main__': print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) b = FerragamoBag() b_url = "http://www.ferragamo.cn/woman/handbags/" b.bagPage(b_url) b.bagItems() f = Config.dataPath + 'ferragamo_%s.txt' % Common.today_ss() b.outItems(f) print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
i_img = m.group(1) i_size = "" i_number = "" m = re.search(r'<div class="product-code">(.+?)型号代码(.+?)</div>', page, flags=re.S) if m: i_size, i_number = m.group(1).strip(), m.group(2).strip() i = BagItem(self.brand_type) i.initItem(serie_title, "", i_name, i_price, i_unit, i_size, i_url, i_img, i_number) print "# itemPage:", i.outItem() # self.items.append(i.outItem()) def outItems(self, f): s = "#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号" with open(f, "w") as f_item: self.items.insert(0, s) f_item.write("\n".join(self.items)) if __name__ == "__main__": print time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time())) b = FerragamoBag() b_url = "http://www.ferragamo.cn/woman/handbags/" b.bagPage(b_url) b.bagItems() f = Config.dataPath + "ferragamo_%s.txt" % Common.today_ss() b.outItems(f) print time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))
def bagItems(self): #for link in self.link_list: self.itemPage(link) max_th = 10 if len(self.link_list) > max_th: m_itemsObj = BagItemM(self.home_url, self.brand_type, max_th) else: m_itemsObj = BagItemM(self.home_url, self.brand_type, len(self.link_list)) m_itemsObj.createthread() m_itemsObj.putItems(self.link_list) m_itemsObj.run() self.items.extend(m_itemsObj.items) def outItems(self, f): s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号' with open(f, 'w') as f_item: self.items.insert(0, s) f_item.write('\n'.join(self.items)) if __name__ == '__main__': print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) b = GivenchyBag() b_url = 'http://www.givenchy.com/cn/' b.bagPage(b_url) b.bagItems() f = Config.dataPath + 'givenchy_%s.txt' % Common.today_ss() b.outItems(f) print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
m = re.search(r'<div class="itemDimensions">.+?<span class="dimensions">(.+?)</span></div>', page, flags=re.S) if m: i_size = m.group(1) i_number m = re.search(r'<div class="styleIdDescription">货号.+?<span.*?>(.+?)</span></div>', page, flags=re.S) if m: i_number = m.group(1) i = BagItem(self.brand_type) i.initItem(serie_title, '', i_name, i_price, i_unit, i_size, i_url, i_img, i_number) print '# itemPage:',i.outItem() #self.items.append(i.outItem()) def outItems(self, f): s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号' with open(f, 'w') as f_item: self.items.insert(0, s) f_item.write('\n'.join(self.items)) if __name__ == '__main__': print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) b = YslBag() b_url = "http://www.ysl.com/wy/shop-product/%E5%A5%B3%E5%A3%AB" b.bagPage(b_url) b.bagItems() f = Config.dataPath + 'ysl_%s.txt' %Common.today_ss() b.outItems(f) print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
m = re.search(r'<span.+?>尺寸大小:</span>(.+?)</span>', page, flags=re.S) if m: i_size = re.sub(r'<.+?>','',m.group(1)) i_number = '' m = re.search(r'<div class="base">\s*<div class="sku-brand">.+?<dl class="hidden"><dt>商品货号: </dt><dd>(.+?)</dd></dl>\s*</div>', page, flags=re.S) if m: i_number = m.group(1) i = BagItem(self.brand_type) i.initItem(serie_title, '', i_name, i_price, i_unit, i_size, i_url, i_img, i_number) print '# itemPage:',i.outItem() #self.items.append(i.outItem()) def outItems(self, f): s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号' with open(f, 'w') as f_item: self.items.insert(0, s) f_item.write('\n'.join(self.items)) if __name__ == '__main__': print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) b = BossBag() b_url = "http://store.hugoboss.cn/category.php?id=3835&form_nav" b.bagPage(b_url) b.bagItems() f = Config.dataPath + 'boss_%s.txt' %Common.today_ss() b.outItems(f) print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
m = re.search(r'<div class="scrollCnt">\s*<ul>.+?<li>(尺码.+?)</li>', page, flags=re.S) if m: i_size = m.group(1) i_number = '' m = re.search(r'<div id="itemTechSheet">.+?<p class="prodCode">(.+?)</p>', page, flags=re.S) if m: i_number = m.group(1).split(':')[1].strip() i = BagItem(self.brand_type) i.initItem('', item_title, i_name, i_price, i_unit, i_size, i_url, i_img, i_number) print '# itemPage:',i.outItem() #self.items.append(i.outItem()) def outItems(self, f): s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号' with open(f, 'w') as f_item: self.items.insert(0, s) f_item.write('\n'.join(self.items)) if __name__ == '__main__': print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) b = DolcegabbanaBag() b_url = "http://www.dolcegabbana.com.cn/cn/dolce-gabbana/%E5%A5%B3%E5%A3%AB/onlinestore/%E5%8C%85%E8%A2%8B" b.bagPage(b_url) b.bagItems() f = Config.dataPath + 'dolcegabbana_%s.txt' %Common.today_ss() b.outItems(f) print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))