예제 #1
0
            i_size += m.group(1) + ":" + m.group(2) + ";"

        i_number
        m = re.search(r' <div class="modelFabricColorWrapper">\s*<div class="inner".*?>\s*<span class="modelTitle">.+?</span>.+?<span.*?class="value">(.+?)</span>\s*</div>\s*</div>\s*</div>', page, flags=re.S)
        if m:
            i_number = m.group(1)

        i = BagItem(self.brand_type)
        i.initItem(serie_title, '', i_name, '', '', i_size, i_url, i_img, i_number)
        print '# itemPage:',i.outItem()
        #self.items.append(i.outItem())    
        #print '# itemPage :', serie_title, i_name, i_url, i_img, i_size

    def outItems(self, f):
        s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号'
        with open(f, 'w') as f_item:
            self.items.insert(0, s)
            f_item.write('\n'.join(self.items))

if __name__ == '__main__':
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    b = BottegavenetaBag()
    b_url = "http://www.bottegaveneta.com/wy/%E5%A5%B3%E5%A3%AB/onlineboutique/%E6%89%8B%E8%A2%8B"
    b.bagPage(b_url)
    b.bagItems()
    
    f = Config.dataPath + 'bottegaveneta_%s.txt' %Common.today_ss()
    b.outItems(f)
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    
예제 #2
0
        m = re.search(
            r'<div id="itemTechSheet">.+?<p class="prodCode">(.+?)</p>',
            page,
            flags=re.S)
        if m:
            i_number = m.group(1).split(':')[1].strip()

        i = BagItem(self.brand_type)
        i.initItem('', item_title, i_name, i_price, i_unit, i_size, i_url,
                   i_img, i_number)
        print '# itemPage:', i.outItem()
        #self.items.append(i.outItem())

    def outItems(self, f):
        s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号'
        with open(f, 'w') as f_item:
            self.items.insert(0, s)
            f_item.write('\n'.join(self.items))


if __name__ == '__main__':
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    b = DolcegabbanaBag()
    b_url = "http://www.dolcegabbana.com.cn/cn/dolce-gabbana/%E5%A5%B3%E5%A3%AB/onlinestore/%E5%8C%85%E8%A2%8B"
    b.bagPage(b_url)
    b.bagItems()

    f = Config.dataPath + 'dolcegabbana_%s.txt' % Common.today_ss()
    b.outItems(f)
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
예제 #3
0
                    m = re.search(r'"currency-symbol":"(.+?)"', data, flags=re.S)
                    if m: unit = m.group(1)
                if self.item_price != '':
                    if price: i_price += '-' + price
                else:
                    if price: i_price = price
                    if unit: i_unit  = unit
        
        i = BagItem(self.brand_type)
        i.initItem(serie_title, i_title, i_name, i_price, i_unit, i_size, i_url, i_img, i_number)
        #print '# itemPage :', serie_title, i_title, i_name, i_price, i_unit, i_size, i_url, i_img

    def outItems(self, f):
        s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号'
        with open(f, 'w') as f_item:
            self.items.insert(0, s)
            f_item.write('\n'.join(self.items))


if __name__ == '__main__':
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    b = ChanelBag()
    b_url = 'http://www.chanel.com/zh_CN/fashion/products/handbags.html'
    b.bagPage(b_url)
    b.bagItems()
    
    f = Config.dataPath + 'chanel_%s.txt' %Common.today_ss()
    b.outItems(f)
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    
예제 #4
0
                
    def bagItems(self):
        #for link in self.link_list: self.itemPage(link)
        max_th = 10
        if len(self.link_list) > max_th:
            m_itemsObj = BagItemM(self.home_url, self.brand_type, max_th)
        else:
            m_itemsObj = BagItemM(self.home_url, self.brand_type, len(self.link_list))
        m_itemsObj.createthread()
        m_itemsObj.putItems(self.link_list)
        m_itemsObj.run()
        self.items.extend(m_itemsObj.items)

    def outItems(self, f):
        s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号'
        with open(f, 'w') as f_item:
            self.items.insert(0, s)
            f_item.write('\n'.join(self.items))

if __name__ == '__main__':
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    b = GivenchyBag()
    b_url = 'http://www.givenchy.com/cn/'
    b.bagPage(b_url)
    b.bagItems()
    
    f = Config.dataPath + 'givenchy_%s.txt' %Common.today_ss()
    b.outItems(f)
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
 
예제 #5
0
파일: bossBage.py 프로젝트: xzhoutxd/brand
        m = re.search(
            r'<div class="base">\s*<div class="sku-brand">.+?<dl class="hidden"><dt>商品货号: </dt><dd>(.+?)</dd></dl>\s*</div>',
            page,
            flags=re.S)
        if m:
            i_number = m.group(1)

        i = BagItem(self.brand_type)
        i.initItem(serie_title, '', i_name, i_price, i_unit, i_size, i_url,
                   i_img, i_number)
        print '# itemPage:', i.outItem()
        #self.items.append(i.outItem())

    def outItems(self, f):
        s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号'
        with open(f, 'w') as f_item:
            self.items.insert(0, s)
            f_item.write('\n'.join(self.items))


if __name__ == '__main__':
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    b = BossBag()
    b_url = "http://store.hugoboss.cn/category.php?id=3835&form_nav"
    b.bagPage(b_url)
    b.bagItems()

    f = Config.dataPath + 'boss_%s.txt' % Common.today_ss()
    b.outItems(f)
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
예제 #6
0
파일: armaniBag.py 프로젝트: xzhoutxd/brand
            #i_size = "".join(size_str.split())
            i_size = re.sub(r'\s*','',size_str)
            print "".join(i_size.split())

        i_number = ''
        m = re.search(r' <div class="modelFabricColorWrapper">\s*<div class="inner".*?>\s*<span class="modelTitle">.+?</span>.+?<span.*?class="value">(.+?)</span>\s*</div>\s*</div>\s*</div>', page, flags=re.S)
        if m:
            i_number = m.group(1)

        i = BagItem()
        i.initItem(serie_title, '', i_name, i_price, i_unit, i_size, i_url, i_img, i_number)
        self.items.append(i.outItem)    
        #print '# itemPage :', serie_title, i_name, i_price, i_unit, i_size, i_url, i_img

    def outItems(self, f):
        s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号'
        with open(f, 'w') as f_item:
            self.items.insert(0, s)
            f_item.write('\n'.join(self.items))

if __name__ == '__main__':
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    b = ArmaniBag()
    b.bagPage()
    b.bagItems()
    
    f = Config.dataPath + 'armani_%s.txt' %Common.today_ss()
    b.outItems(f)
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    
예제 #7
0
파일: armaniBag.py 프로젝트: xzhoutxd/brand
        i_number = ''
        m = re.search(
            r' <div class="modelFabricColorWrapper">\s*<div class="inner".*?>\s*<span class="modelTitle">.+?</span>.+?<span.*?class="value">(.+?)</span>\s*</div>\s*</div>\s*</div>',
            page,
            flags=re.S)
        if m:
            i_number = m.group(1)

        i = BagItem()
        i.initItem(serie_title, '', i_name, i_price, i_unit, i_size, i_url,
                   i_img, i_number)
        self.items.append(i.outItem)
        #print '# itemPage :', serie_title, i_name, i_price, i_unit, i_size, i_url, i_img

    def outItems(self, f):
        s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号'
        with open(f, 'w') as f_item:
            self.items.insert(0, s)
            f_item.write('\n'.join(self.items))


if __name__ == '__main__':
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    b = ArmaniBag()
    b.bagPage()
    b.bagItems()

    f = Config.dataPath + 'armani_%s.txt' % Common.today_ss()
    b.outItems(f)
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
예제 #8
0
파일: diorBag.py 프로젝트: xzhoutxd/brand
                if m: i_size = m.group(1).strip()

        i_number = ''
        m = re.search(r'<div class="columns-wrapper">.+?<div class="column">.*?<div class="reference">\s*<p>(.+?)</p>\s*</div>', page, flags=re.S)
        if m:
            s_number = m.group(1)
            i_number = s_number.split('-')[1].strip()
                
        i = BagItem()
        i.initItem(serie_title, i_title, i_name, i_price, i_unit, i_size, i_url, i_img, i_number)
        self.items.append(i.outItem)    
        print '# itemPage :', serie_title, i_title, i_name, i_price, i_unit, i_size, i_url, i_img

    def outItems(self, f):
        s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号'
        with open(f, 'w') as f_item:
            self.items.insert(0, s)
            f_item.write('\n'.join(self.items))

if __name__ == '__main__':
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    b = DiorBag()
    b_url = 'http://www.dior.cn/couture/zh_cn/%E5%A5%B3%E5%A3%AB%E6%97%B6%E8%A3%85/%E7%9A%AE%E5%85%B7'
    b.bagPage(b_url)
    b.bagItems()
    
    f = Config.dataPath + 'dior_%s.txt' %Common.today_ss()
    b.outItems(f)
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    
예제 #9
0
            r' <div class="modelFabricColorWrapper">\s*<div class="inner".*?>\s*<span class="modelTitle">.+?</span>.+?<span.*?class="value">(.+?)</span>\s*</div>\s*</div>\s*</div>',
            page,
            flags=re.S)
        if m:
            i_number = m.group(1)

        i = BagItem(self.brand_type)
        i.initItem(serie_title, '', i_name, '', '', i_size, i_url, i_img,
                   i_number)
        print '# itemPage:', i.outItem()
        #self.items.append(i.outItem())
        #print '# itemPage :', serie_title, i_name, i_url, i_img, i_size

    def outItems(self, f):
        s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号'
        with open(f, 'w') as f_item:
            self.items.insert(0, s)
            f_item.write('\n'.join(self.items))


if __name__ == '__main__':
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    b = BottegavenetaBag()
    b_url = "http://www.bottegaveneta.com/wy/%E5%A5%B3%E5%A3%AB/onlineboutique/%E6%89%8B%E8%A2%8B"
    b.bagPage(b_url)
    b.bagItems()

    f = Config.dataPath + 'bottegaveneta_%s.txt' % Common.today_ss()
    b.outItems(f)
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
예제 #10
0
        self.crawler = MyCrawler()

        # 品牌官网链接
        self.home_url = 'http://www.mcmworldwide.com'
        self.women_url = self.home_url + '/en/women'
        self.bag_url = self.women_url + '/bags'
        self.backpack_url = self.women_url + '/backpacks'
        self.leather_url = self.women_url + '/small-leather-goods'
        self.refers = None

        # 抓取商品列表
        self.links = []
        self.items = []

    def bagPage(self):
        url = self.bug_url + '#start=0&sz=32&srule=New'
        page = self.crawler.getData(self.bag_url, self.women_url)
        if not page or page == '': return


if __name__ == '__main__':
    b = ChanelBag()

    b_url = 'http://www.chanel.com/zh_CN/fashion/products/handbags/g.spring-summer-2015.c.15S.html'
    b.bagPage(b_url)
    b.bagItems()

    f = Config.dataPath + 'chanel_%s.txt' % Common.today_ss()
    print f
    b.outItems(f)
예제 #11
0
        m = re.search(r'<h2 class="sku reading-and-link-text">(.+?)</h2>', page, flags=re.S)
        if m:
            i_number = m.group(1).strip()
        else:
            m = re.search(r'<meta itemprop="identifier" content="sku:(.+?)"/>', page, flags=re.S)
            if m:
                i_number = m.group(1).strip()

        i = BagItem(self.brand_type)
        i.initItem(serie_title, '', i_name, i_price, i_unit, i_size, i_url, i_img, i_number)
        print '# itemPage:',i.outItem()
        #self.items.append(i.outItem()) 

    def outItems(self, f):
        s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号'
        with open(f, 'w') as f_item:
            self.items.insert(0, s)
            f_item.write('\n'.join(self.items))

if __name__ == '__main__':
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    b = LouisvuittonBag()
    b_url = "http://www.louisvuitton.cn/zhs-cn/homepage"
    b.bagPage(b_url)
    b.bagItems()
    
    f = Config.dataPath + 'louisvuitton_%s.txt' %Common.today_ss()
    b.outItems(f)
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    
예제 #12
0
            r'<div class="columns-wrapper">.+?<div class="column">.*?<div class="reference">\s*<p>(.+?)</p>\s*</div>',
            page,
            flags=re.S)
        if m:
            s_number = m.group(1)
            i_number = s_number.split('-')[1].strip()

        i = BagItem()
        i.initItem(serie_title, i_title, i_name, i_price, i_unit, i_size,
                   i_url, i_img, i_number)
        self.items.append(i.outItem)
        print '# itemPage :', serie_title, i_title, i_name, i_price, i_unit, i_size, i_url, i_img

    def outItems(self, f):
        s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号'
        with open(f, 'w') as f_item:
            self.items.insert(0, s)
            f_item.write('\n'.join(self.items))


if __name__ == '__main__':
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    b = DiorBag()
    b_url = 'http://www.dior.cn/couture/zh_cn/%E5%A5%B3%E5%A3%AB%E6%97%B6%E8%A3%85/%E7%9A%AE%E5%85%B7'
    b.bagPage(b_url)
    b.bagItems()

    f = Config.dataPath + 'dior_%s.txt' % Common.today_ss()
    b.outItems(f)
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
예제 #13
0
        i_number = ''
        m = re.search(r'<div class="product-code">(.+?)型号代码(.+?)</div>',
                      page,
                      flags=re.S)
        if m:
            i_size, i_number = m.group(1).strip(), m.group(2).strip()

        i = BagItem(self.brand_type)
        i.initItem(serie_title, '', i_name, i_price, i_unit, i_size, i_url,
                   i_img, i_number)
        print '# itemPage:', i.outItem()
        #self.items.append(i.outItem())

    def outItems(self, f):
        s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号'
        with open(f, 'w') as f_item:
            self.items.insert(0, s)
            f_item.write('\n'.join(self.items))


if __name__ == '__main__':
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    b = FerragamoBag()
    b_url = "http://www.ferragamo.cn/woman/handbags/"
    b.bagPage(b_url)
    b.bagItems()

    f = Config.dataPath + 'ferragamo_%s.txt' % Common.today_ss()
    b.outItems(f)
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
예제 #14
0
            i_img = m.group(1)

        i_size = ""
        i_number = ""
        m = re.search(r'<div class="product-code">(.+?)型号代码(.+?)</div>', page, flags=re.S)
        if m:
            i_size, i_number = m.group(1).strip(), m.group(2).strip()

        i = BagItem(self.brand_type)
        i.initItem(serie_title, "", i_name, i_price, i_unit, i_size, i_url, i_img, i_number)
        print "# itemPage:", i.outItem()
        # self.items.append(i.outItem())

    def outItems(self, f):
        s = "#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号"
        with open(f, "w") as f_item:
            self.items.insert(0, s)
            f_item.write("\n".join(self.items))


if __name__ == "__main__":
    print time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))
    b = FerragamoBag()
    b_url = "http://www.ferragamo.cn/woman/handbags/"
    b.bagPage(b_url)
    b.bagItems()

    f = Config.dataPath + "ferragamo_%s.txt" % Common.today_ss()
    b.outItems(f)
    print time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))
예제 #15
0
    def bagItems(self):
        #for link in self.link_list: self.itemPage(link)
        max_th = 10
        if len(self.link_list) > max_th:
            m_itemsObj = BagItemM(self.home_url, self.brand_type, max_th)
        else:
            m_itemsObj = BagItemM(self.home_url, self.brand_type,
                                  len(self.link_list))
        m_itemsObj.createthread()
        m_itemsObj.putItems(self.link_list)
        m_itemsObj.run()
        self.items.extend(m_itemsObj.items)

    def outItems(self, f):
        s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号'
        with open(f, 'w') as f_item:
            self.items.insert(0, s)
            f_item.write('\n'.join(self.items))


if __name__ == '__main__':
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    b = GivenchyBag()
    b_url = 'http://www.givenchy.com/cn/'
    b.bagPage(b_url)
    b.bagItems()

    f = Config.dataPath + 'givenchy_%s.txt' % Common.today_ss()
    b.outItems(f)
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
예제 #16
0
        m = re.search(r'<div class="itemDimensions">.+?<span class="dimensions">(.+?)</span></div>', page, flags=re.S)
        if m:
            i_size = m.group(1)

        i_number
        m = re.search(r'<div class="styleIdDescription">货号.+?<span.*?>(.+?)</span></div>', page, flags=re.S)
        if m:
            i_number = m.group(1)

        i = BagItem(self.brand_type)
        i.initItem(serie_title, '', i_name, i_price, i_unit, i_size, i_url, i_img, i_number)
        print '# itemPage:',i.outItem()
        #self.items.append(i.outItem()) 

    def outItems(self, f):
        s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号'
        with open(f, 'w') as f_item:
            self.items.insert(0, s)
            f_item.write('\n'.join(self.items))

if __name__ == '__main__':
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    b = YslBag()
    b_url = "http://www.ysl.com/wy/shop-product/%E5%A5%B3%E5%A3%AB"
    b.bagPage(b_url)
    b.bagItems()
    
    f = Config.dataPath + 'ysl_%s.txt' %Common.today_ss()
    b.outItems(f)
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
예제 #17
0
파일: bossBage.py 프로젝트: xzhoutxd/brand
            m = re.search(r'<span.+?>尺寸大小:</span>(.+?)</span>', page, flags=re.S)
            if m:
                i_size = re.sub(r'<.+?>','',m.group(1))

        i_number = ''
        m = re.search(r'<div class="base">\s*<div class="sku-brand">.+?<dl class="hidden"><dt>商品货号: </dt><dd>(.+?)</dd></dl>\s*</div>', page, flags=re.S)
        if m:
            i_number = m.group(1)

        i = BagItem(self.brand_type)
        i.initItem(serie_title, '', i_name, i_price, i_unit, i_size, i_url, i_img, i_number)
        print '# itemPage:',i.outItem()
        #self.items.append(i.outItem()) 

    def outItems(self, f):
        s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号'
        with open(f, 'w') as f_item:
            self.items.insert(0, s)
            f_item.write('\n'.join(self.items))

if __name__ == '__main__':
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    b = BossBag()
    b_url = "http://store.hugoboss.cn/category.php?id=3835&form_nav"
    b.bagPage(b_url)
    b.bagItems()
    
    f = Config.dataPath + 'boss_%s.txt' %Common.today_ss()
    b.outItems(f)
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
예제 #18
0
            m = re.search(r'<div class="scrollCnt">\s*<ul>.+?<li>(尺码.+?)</li>', page, flags=re.S)
            if m:
                i_size = m.group(1)

        i_number = ''
        m = re.search(r'<div id="itemTechSheet">.+?<p class="prodCode">(.+?)</p>', page, flags=re.S)
        if m:
            i_number = m.group(1).split(':')[1].strip()

        i = BagItem(self.brand_type)
        i.initItem('', item_title, i_name, i_price, i_unit, i_size, i_url, i_img, i_number)
        print '# itemPage:',i.outItem()
        #self.items.append(i.outItem()) 

    def outItems(self, f):
        s = '#系列名称|商品标签|商品名称|商品价格|金额单位|商品尺寸|商品链接|商品图片|商品编号'
        with open(f, 'w') as f_item:
            self.items.insert(0, s)
            f_item.write('\n'.join(self.items))

if __name__ == '__main__':
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    b = DolcegabbanaBag()
    b_url = "http://www.dolcegabbana.com.cn/cn/dolce-gabbana/%E5%A5%B3%E5%A3%AB/onlinestore/%E5%8C%85%E8%A2%8B"
    b.bagPage(b_url)
    b.bagItems()
    
    f = Config.dataPath + 'dolcegabbana_%s.txt' %Common.today_ss()
    b.outItems(f)
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))